aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-04-23 16:57:46 -0700
committerStephen Hines <srhines@google.com>2014-04-24 15:53:16 -0700
commit36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch)
treee6cfb69fbbd937f450eeb83bfb83b9da3b01275a /lib/CodeGen/SelectionDAG
parent69a8640022b04415ae9fac62f8ab090601d8f889 (diff)
downloadexternal_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip
Update to LLVM 3.5a.
Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r--lib/CodeGen/SelectionDAG/Android.mk2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1277
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp158
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp31
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp386
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp21
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h5
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp53
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp71
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeDbgValue.h2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp44
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h6
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp170
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp405
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h20
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp21
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp69
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp97
-rw-r--r--lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp2
27 files changed, 2046 insertions, 899 deletions
diff --git a/lib/CodeGen/SelectionDAG/Android.mk b/lib/CodeGen/SelectionDAG/Android.mk
index 3f28e08029..0e52ee3609 100644
--- a/lib/CodeGen/SelectionDAG/Android.mk
+++ b/lib/CodeGen/SelectionDAG/Android.mk
@@ -41,6 +41,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
# For the device
# =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(codegen_selectiondag_SRC_FILES)
@@ -52,3 +53,4 @@ LOCAL_MODULE_TAGS := optional
include $(LLVM_DEVICE_BUILD_MK)
include $(LLVM_GEN_INTRINSICS_MK)
include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 43f72c5ef9..cc0c5fa076 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -50,11 +50,28 @@ STATISTIC(SlicedLoads, "Number of load sliced");
namespace {
static cl::opt<bool>
CombinerAA("combiner-alias-analysis", cl::Hidden,
- cl::desc("Turn on alias analysis during testing"));
+ cl::desc("Enable DAG combiner alias-analysis heuristics"));
static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
- cl::desc("Include global information in alias analysis"));
+ cl::desc("Enable DAG combiner's use of IR alias analysis"));
+
+// FIXME: Enable the use of TBAA. There are two known issues preventing this:
+// 1. Stack coloring does not update TBAA when merging allocas
+// 2. CGP inserts ptrtoint/inttoptr pairs when sinking address computations.
+// Because BasicAA does not handle inttoptr, we'll often miss basic type
+// punning idioms that we need to catch so we don't miscompile real-world
+// code.
+ static cl::opt<bool>
+ UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(false),
+ cl::desc("Enable DAG combiner's use of TBAA"));
+
+#ifndef NDEBUG
+ static cl::opt<std::string>
+ CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
+ cl::desc("Only use DAG-combiner alias analysis in this"
+ " function"));
+#endif
/// Hidden option to stress test load slicing, i.e., when this option
/// is enabled, load slicing bypasses most of its profitability guards.
@@ -212,6 +229,7 @@ namespace {
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
+ SDValue visitRotate(SDNode *N);
SDValue visitCTLZ(SDNode *N);
SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTTZ(SDNode *N);
@@ -257,11 +275,12 @@ namespace {
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitINSERT_SUBVECTOR(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
- SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+ SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
@@ -271,6 +290,11 @@ namespace {
bool NotExtCompare = false);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
SDLoc DL, bool foldBooleans = true);
+
+ bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) const;
+ bool isOneUseSetCC(SDValue N) const;
+
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
@@ -280,6 +304,10 @@ namespace {
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
+ SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
+ SDValue InnerPos, SDValue InnerNeg,
+ unsigned PosOpcode, unsigned NegOpcode,
+ SDLoc DL);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
@@ -326,6 +354,14 @@ namespace {
/// \return True if some memory operations were changed.
bool MergeConsecutiveStores(StoreSDNode *N);
+ /// \brief Try to transform a truncation where C is a constant:
+ /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
+ ///
+ /// \p N needs to be a truncation and its first operand an AND. Other
+ /// requirements are checked by the function (e.g. that trunc is
+ /// single-use) and if missed an empty SDValue is returned.
+ SDValue distributeTruncateThroughAnd(SDNode *N);
+
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
@@ -378,7 +414,7 @@ public:
explicit WorkListRemover(DAGCombiner &dc)
: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
- virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ void NodeDeleted(SDNode *N, SDNode *E) override {
DC.removeFromWorkList(N);
}
};
@@ -566,79 +602,121 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
}
-
// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
-// that selects between the values 1 and 0, making it equivalent to a setcc.
-// Also, set the incoming LHS, RHS, and CC references to the appropriate
-// nodes based on the type of node we are checking. This simplifies life a
-// bit for the callers.
-static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
- SDValue &CC) {
+// that selects between the target values used for true and false, making it
+// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
+// the appropriate nodes based on the type of node we are checking. This
+// simplifies life a bit for the callers.
+bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) const {
if (N.getOpcode() == ISD::SETCC) {
LHS = N.getOperand(0);
RHS = N.getOperand(1);
CC = N.getOperand(2);
return true;
}
- if (N.getOpcode() == ISD::SELECT_CC &&
- N.getOperand(2).getOpcode() == ISD::Constant &&
- N.getOperand(3).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
- cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
- LHS = N.getOperand(0);
- RHS = N.getOperand(1);
- CC = N.getOperand(4);
- return true;
- }
- return false;
+
+ if (N.getOpcode() != ISD::SELECT_CC ||
+ !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
+ !TLI.isConstFalseVal(N.getOperand(3).getNode()))
+ return false;
+
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(4);
+ return true;
}
// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
// one use. If this is true, it allows the users to invert the operation for
// free when it is profitable to do so.
-static bool isOneUseSetCC(SDValue N) {
+bool DAGCombiner::isOneUseSetCC(SDValue N) const {
SDValue N0, N1, N2;
if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
return true;
return false;
}
+/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
+/// elements are all the same constant or undefined.
+static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
+ BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
+ if (!C)
+ return false;
+
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs) &&
+ EltVT.getSizeInBits() >= SplatBitSize);
+}
+
+// \brief Returns the SDNode if it is a constant BuildVector or constant.
+static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
+ if (isa<ConstantSDNode>(N))
+ return N.getNode();
+ BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
+ if(BV && BV->isConstant())
+ return BV;
+ return NULL;
+}
+
+// \brief Returns the SDNode if it is a constant splat BuildVector or constant
+// int.
+static ConstantSDNode *isConstOrConstSplat(SDValue N) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N))
+ return BV->getConstantSplatValue();
+
+ return nullptr;
+}
+
SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
SDValue N0, SDValue N1) {
EVT VT = N0.getValueType();
- if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
- if (isa<ConstantSDNode>(N1)) {
- // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
- SDValue OpNode =
- DAG.FoldConstantArithmetic(Opc, VT,
- cast<ConstantSDNode>(N0.getOperand(1)),
- cast<ConstantSDNode>(N1));
- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
- }
- if (N0.hasOneUse()) {
- // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT,
- N0.getOperand(0), N1);
- AddToWorkList(OpNode.getNode());
- return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
- }
- }
-
- if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
- if (isa<ConstantSDNode>(N0)) {
- // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
- SDValue OpNode =
- DAG.FoldConstantArithmetic(Opc, VT,
- cast<ConstantSDNode>(N1.getOperand(1)),
- cast<ConstantSDNode>(N0));
- return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
- }
- if (N1.hasOneUse()) {
- // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT,
- N1.getOperand(0), N0);
- AddToWorkList(OpNode.getNode());
- return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+ if (N0.getOpcode() == Opc) {
+ if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {
+ if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {
+ // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+ SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R);
+ if (!OpNode.getNode())
+ return SDValue();
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ }
+ if (N0.hasOneUse()) {
+ // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
+ // use
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
+ if (!OpNode.getNode())
+ return SDValue();
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ }
+ }
+ }
+
+ if (N1.getOpcode() == Opc) {
+ if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) {
+ if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) {
+ // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+ SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L);
+ if (!OpNode.getNode())
+ return SDValue();
+ return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ }
+ if (N1.hasOneUse()) {
+ // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
+ // use
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
+ if (!OpNode.getNode())
+ return SDValue();
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+ }
}
}
@@ -1148,6 +1226,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SHL: return visitSHL(N);
case ISD::SRA: return visitSRA(N);
case ISD::SRL: return visitSRL(N);
+ case ISD::ROTR:
+ case ISD::ROTL: return visitRotate(N);
case ISD::CTLZ: return visitCTLZ(N);
case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
case ISD::CTTZ: return visitCTTZ(N);
@@ -1193,6 +1273,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
}
return SDValue();
}
@@ -1507,8 +1588,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
+ if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+ }
}
}
@@ -1778,22 +1861,6 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) {
return SDValue();
}
-/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
-/// elements are all the same constant or undefined.
-static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
- BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
- if (!C)
- return false;
-
- APInt SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- EVT EltVT = N->getValueType(0).getVectorElementType();
- return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
- HasAnyUndefs) &&
- EltVT.getSizeInBits() >= SplatBitSize);
-}
-
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2229,7 +2296,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
bool HiExists = N->hasAnyUseOfValue(1);
if (!HiExists &&
(!LegalOperations ||
- TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
+ TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
N->op_begin(), N->getNumOperands());
return CombineTo(N, Res, Res);
@@ -2454,35 +2521,66 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// The type-legalizer generates this pattern when loading illegal
// vector types from memory. In many cases this allows additional shuffle
// optimizations.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
- N0.getOperand(1).getOpcode() == ISD::UNDEF &&
- N1.getOperand(1).getOpcode() == ISD::UNDEF) {
+ // There are other cases where moving the shuffle after the xor/and/or
+ // is profitable even if shuffles don't perform a swizzle.
+ // If both shuffles use the same mask, and both shuffles have the same first
+ // or second operand, then it might still be profitable to move the shuffle
+ // after the xor/and/or operation.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
- assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
+ assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
"Inputs to shuffles are not the same type");
-
- unsigned NumElts = VT.getVectorNumElements();
-
+
// Check that both shuffles use the same mask. The masks are known to be of
// the same length because the result vector type is the same.
- bool SameMask = true;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx0 = SVN0->getMaskElt(i);
- int Idx1 = SVN1->getMaskElt(i);
- if (Idx0 != Idx1) {
- SameMask = false;
- break;
+ // Check also that shuffles have only one use to avoid introducing extra
+ // instructions.
+ if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
+ SVN0->getMask().equals(SVN1->getMask())) {
+ SDValue ShOp = N0->getOperand(1);
+
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
+ if (!LegalTypes)
+ ShOp = DAG.getConstant(0, VT);
+ else
+ ShOp = SDValue();
}
- }
- if (SameMask) {
- SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- N0.getOperand(0), N1.getOperand(0));
- AddToWorkList(Op.getNode());
- return DAG.getVectorShuffle(VT, SDLoc(N), Op,
- DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+ // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
+ // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
+ // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
+ if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
+ SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+ N0->getOperand(0), N1->getOperand(0));
+ AddToWorkList(NewNode.getNode());
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
+ &SVN0->getMask()[0]);
+ }
+
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ ShOp = N0->getOperand(0);
+ if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
+ if (!LegalTypes)
+ ShOp = DAG.getConstant(0, VT);
+ else
+ ShOp = SDValue();
+ }
+
+ // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
+ // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
+ // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
+ if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
+ SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+ N0->getOperand(1), N1->getOperand(1));
+ AddToWorkList(NewNode.getNode());
+ return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
+ &SVN0->getMask()[0]);
+ }
}
}
@@ -3151,6 +3249,60 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return N0;
if (ISD::isBuildVectorAllOnes(N1.getNode()))
return N1;
+
+ // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
+ // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
+ // Do this only if the resulting shuffle is legal.
+ if (isa<ShuffleVectorSDNode>(N0) &&
+ isa<ShuffleVectorSDNode>(N1) &&
+ N0->getOperand(1) == N1->getOperand(1) &&
+ ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
+ bool CanFold = true;
+ unsigned NumElts = VT.getVectorNumElements();
+ const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
+ const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
+ // We construct two shuffle masks:
+ // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
+ // and N1 as the second operand.
+ // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
+ // and N0 as the second operand.
+ // We do this because OR is commutable and therefore there might be
+ // two ways to fold this node into a shuffle.
+ SmallVector<int,4> Mask1;
+ SmallVector<int,4> Mask2;
+
+ for (unsigned i = 0; i != NumElts && CanFold; ++i) {
+ int M0 = SV0->getMaskElt(i);
+ int M1 = SV1->getMaskElt(i);
+
+ // Both shuffle indexes are undef. Propagate Undef.
+ if (M0 < 0 && M1 < 0) {
+ Mask1.push_back(M0);
+ Mask2.push_back(M0);
+ continue;
+ }
+
+ if (M0 < 0 || M1 < 0 ||
+ (M0 < (int)NumElts && M1 < (int)NumElts) ||
+ (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
+ CanFold = false;
+ break;
+ }
+
+ Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
+ Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
+ }
+
+ if (CanFold) {
+ // Fold this sequence only if the resulting shuffle is 'legal'.
+ if (TLI.isShuffleMaskLegal(Mask1, VT))
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
+ N1->getOperand(0), &Mask1[0]);
+ if (TLI.isShuffleMaskLegal(Mask2, VT))
+ return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
+ N0->getOperand(0), &Mask2[0]);
+ }
+ }
}
// fold (or x, undef) -> -1
@@ -3192,11 +3344,14 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
- if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+ if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
+ SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1);
+ if (!COR.getNode())
+ return SDValue();
return DAG.getNode(ISD::AND, SDLoc(N), VT,
DAG.getNode(ISD::OR, SDLoc(N0), VT,
- N0.getOperand(0), N1),
- DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+ N0.getOperand(0), N1), COR);
+ }
}
// fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
@@ -3302,6 +3457,155 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
return false;
}
+// Return true if we can prove that, whenever Neg and Pos are both in the
+// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that
+// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
+//
+// (or (shift1 X, Neg), (shift2 X, Pos))
+//
+// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
+// in direction shift1 by Neg. The range [0, OpSize) means that we only need
+// to consider shift amounts with defined behavior.
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
+ // If OpSize is a power of 2 then:
+ //
+ // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1)
+ // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize).
+ //
+ // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check
+ // for the stronger condition:
+ //
+ // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A]
+ //
+ // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1)
+ // we can just replace Neg with Neg' for the rest of the function.
+ //
+ // In other cases we check for the even stronger condition:
+ //
+ // Neg == OpSize - Pos [B]
+ //
+ // for all Neg and Pos. Note that the (or ...) then invokes undefined
+ // behavior if Pos == 0 (and consequently Neg == OpSize).
+ //
+ // We could actually use [A] whenever OpSize is a power of 2, but the
+ // only extra cases that it would match are those uninteresting ones
+ // where Neg and Pos are never in range at the same time. E.g. for
+ // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
+ // as well as (sub 32, Pos), but:
+ //
+ // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
+ //
+ // always invokes undefined behavior for 32-bit X.
+ //
+ // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
+ unsigned MaskLoBits = 0;
+ if (Neg.getOpcode() == ISD::AND &&
+ isPowerOf2_64(OpSize) &&
+ Neg.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
+ Neg = Neg.getOperand(0);
+ MaskLoBits = Log2_64(OpSize);
+ }
+
+ // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
+ if (Neg.getOpcode() != ISD::SUB)
+ return 0;
+ ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0));
+ if (!NegC)
+ return 0;
+ SDValue NegOp1 = Neg.getOperand(1);
+
+ // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
+ // Pos'. The truncation is redundant for the purpose of the equality.
+ if (MaskLoBits &&
+ Pos.getOpcode() == ISD::AND &&
+ Pos.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
+ Pos = Pos.getOperand(0);
+
+ // The condition we need is now:
+ //
+ // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
+ //
+ // If NegOp1 == Pos then we need:
+ //
+ // OpSize & Mask == NegC & Mask
+ //
+ // (because "x & Mask" is a truncation and distributes through subtraction).
+ APInt Width;
+ if (Pos == NegOp1)
+ Width = NegC->getAPIntValue();
+ // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
+ // Then the condition we want to prove becomes:
+ //
+ // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask
+ //
+ // which, again because "x & Mask" is a truncation, becomes:
+ //
+ // NegC & Mask == (OpSize - PosC) & Mask
+ // OpSize & Mask == (NegC + PosC) & Mask
+ else if (Pos.getOpcode() == ISD::ADD &&
+ Pos.getOperand(0) == NegOp1 &&
+ Pos.getOperand(1).getOpcode() == ISD::Constant)
+ Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() +
+ NegC->getAPIntValue());
+ else
+ return false;
+
+ // Now we just need to check that OpSize & Mask == Width & Mask.
+ if (MaskLoBits)
+ // Opsize & Mask is 0 since Mask is Opsize - 1.
+ return Width.getLoBits(MaskLoBits) == 0;
+ return Width == OpSize;
+}
+
+// A subroutine of MatchRotate used once we have found an OR of two opposite
+// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
+// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
+// former being preferred if supported. InnerPos and InnerNeg are Pos and
+// Neg with outer conversions stripped away.
+SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
+ SDValue Neg, SDValue InnerPos,
+ SDValue InnerNeg, unsigned PosOpcode,
+ unsigned NegOpcode, SDLoc DL) {
+ // fold (or (shl x, (*ext y)),
+ // (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, y) or (rotr x, (sub 32, y))
+ //
+ // fold (or (shl x, (*ext (sub 32, y))),
+ // (srl x, (*ext y))) ->
+ // (rotr x, y) or (rotl x, (sub 32, y))
+ EVT VT = Shifted.getValueType();
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) {
+ bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
+ return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
+ HasPos ? Pos : Neg).getNode();
+ }
+
+ // fold (or (shl (*ext x), (*ext y)),
+ // (srl (*ext x), (*ext (sub 32, y)))) ->
+ // (*ext (rotl x, y)) or (*ext (rotr x, (sub 32, y)))
+ //
+ // fold (or (shl (*ext x), (*ext (sub 32, y))),
+ // (srl (*ext x), (*ext y))) ->
+ // (*ext (rotr x, y)) or (*ext (rotl x, (sub 32, y)))
+ if (Shifted.getOpcode() == ISD::ZERO_EXTEND ||
+ Shifted.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue InnerShifted = Shifted.getOperand(0);
+ EVT InnerVT = InnerShifted.getValueType();
+ bool HasPosInner = TLI.isOperationLegalOrCustom(PosOpcode, InnerVT);
+ if (HasPosInner || TLI.isOperationLegalOrCustom(NegOpcode, InnerVT)) {
+ if (matchRotateSub(InnerPos, InnerNeg, InnerVT.getSizeInBits())) {
+ SDValue V = DAG.getNode(HasPosInner ? PosOpcode : NegOpcode, DL,
+ InnerVT, InnerShifted, HasPosInner ? Pos : Neg);
+ return DAG.getNode(Shifted.getOpcode(), DL, VT, V).getNode();
+ }
+ }
+ }
+
+ return 0;
+}
+
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
// a rot[lr].
@@ -3342,6 +3646,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
unsigned OpSizeInBits = VT.getSizeInBits();
SDValue LHSShiftArg = LHSShift.getOperand(0);
SDValue LHSShiftAmt = LHSShift.getOperand(1);
+ SDValue RHSShiftArg = RHSShift.getOperand(0);
SDValue RHSShiftAmt = RHSShift.getOperand(1);
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
@@ -3395,28 +3700,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
RExtOp0 = RHSShiftAmt.getOperand(0);
}
- if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) {
- // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
- // (rotl x, y)
- // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
- // (rotr x, (sub 32, y))
- if (ConstantSDNode *SUBC =
- dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0)))
- if (SUBC->getAPIntValue() == OpSizeInBits)
- return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
- HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
- } else if (LExtOp0.getOpcode() == ISD::SUB &&
- RExtOp0 == LExtOp0.getOperand(1)) {
- // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
- // (rotr x, y)
- // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
- // (rotl x, (sub 32, y))
- if (ConstantSDNode *SUBC =
- dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0)))
- if (SUBC->getAPIntValue() == OpSizeInBits)
- return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
- HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
- }
+ SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
+ LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
+ if (TryL)
+ return TryL;
+
+ SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
+ RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
+ if (TryR)
+ return TryR;
return 0;
}
@@ -3559,7 +3851,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
/// visitShiftByConstant - Handle transforms common to the three shifts, when
/// the shift amount is a constant.
-SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
+ // We can't and shouldn't fold opaque constants.
+ if (Amt->isOpaque())
+ return SDValue();
+
SDNode *LHS = N->getOperand(0).getNode();
if (!LHS->hasOneUse()) return SDValue();
@@ -3585,9 +3881,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
break;
}
- // We require the RHS of the binop to be a constant as well.
+ // We require the RHS of the binop to be a constant and not opaque as well.
ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
- if (!BinOpCst) return SDValue();
+ if (!BinOpCst || BinOpCst->isOpaque()) return SDValue();
// FIXME: disable this unless the input to the binop is a shift by a constant.
// If it is not a shift, it pessimizes some common cases like:
@@ -3617,6 +3913,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
N->getValueType(0),
LHS->getOperand(1), N->getOperand(1));
+ assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
// Create the new shift.
SDValue NewShift = DAG.getNode(N->getOpcode(),
@@ -3627,18 +3924,74 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
}
+SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
+ assert(N->getOpcode() == ISD::TRUNCATE);
+ assert(N->getOperand(0).getOpcode() == ISD::AND);
+
+ // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
+ if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
+ SDValue N01 = N->getOperand(0).getOperand(1);
+
+ if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
+ EVT TruncVT = N->getValueType(0);
+ SDValue N00 = N->getOperand(0).getOperand(0);
+ APInt TruncC = N01C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+
+ return DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00),
+ DAG.getConstant(TruncC, TruncVT));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitRotate(SDNode *N) {
+ // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
+ if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
+ SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode());
+ if (NewOp1.getNode())
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ N->getOperand(0), NewOp1);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
+
+ BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
+ // If setcc produces all-one true value then:
+ // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
+ if (N1CV && N1CV->isConstant()) {
+ if (N0.getOpcode() == ISD::AND &&
+ TLI.getBooleanContents(true) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
+
+ if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) {
+ SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
+ if (C.getNode())
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
+ }
+ } else {
+ N1C = isConstOrConstSplat(N1);
+ }
+ }
}
// fold (shl c1, c2) -> c1<<c2
@@ -3662,35 +4015,25 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getConstant(0, VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
- N1.getOperand(0).getOpcode() == ISD::AND &&
- N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
- SDValue N101 = N1.getOperand(0).getOperand(1);
- if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
- EVT TruncVT = N1.getValueType();
- SDValue N100 = N1.getOperand(0).getOperand(0);
- APInt TruncC = N101C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getSizeInBits());
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
- DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
- DAG.getNode(ISD::TRUNCATE,
- SDLoc(N),
- TruncVT, N100),
- DAG.getConstant(TruncC, TruncVT)));
- }
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+ if (NewOp1.getNode())
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
}
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
- if (N1C && N0.getOpcode() == ISD::SHL &&
- N0.getOperand(1).getOpcode() == ISD::Constant) {
- uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ if (N1C && N0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t c1 = N0C1->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
}
// fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
@@ -3701,20 +4044,21 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND) &&
- N0.getOperand(0).getOpcode() == ISD::SHL &&
- isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
- uint64_t c1 =
- cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- EVT InnerShiftVT = N0.getOperand(0).getValueType();
- uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
- if (c2 >= OpSizeInBits - InnerShiftSize) {
- if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
- DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
- N0.getOperand(0)->getOperand(0)),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ N0.getOperand(0).getOpcode() == ISD::SHL) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ uint64_t c1 = N0Op0C1->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0Op0.getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+ if (c2 >= OpSizeInBits - InnerShiftSize) {
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
+ DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+ N0Op0->getOperand(0)),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
}
}
@@ -3722,19 +4066,20 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Only fold this if the inner zext has no other uses to avoid increasing
// the total number of instructions.
if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
- N0.getOperand(0).getOpcode() == ISD::SRL &&
- isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
- uint64_t c1 =
- cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
- if (c1 < VT.getSizeInBits()) {
- uint64_t c2 = N1C->getZExtValue();
- if (c1 == c2) {
- SDValue NewOp0 = N0.getOperand(0);
- EVT CountVT = NewOp0.getOperand(1).getValueType();
- SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
- NewOp0, DAG.getConstant(c2, CountVT));
- AddToWorkList(NewSHL.getNode());
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+ N0.getOperand(0).getOpcode() == ISD::SRL) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ uint64_t c1 = N0Op0C1->getZExtValue();
+ if (c1 < VT.getScalarSizeInBits()) {
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 == c2) {
+ SDValue NewOp0 = N0.getOperand(0);
+ EVT CountVT = NewOp0.getOperand(1).getValueType();
+ SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
+ NewOp0, DAG.getConstant(c2, CountVT));
+ AddToWorkList(NewSHL.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+ }
}
}
}
@@ -3743,40 +4088,39 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
- if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
- N0.getOperand(1).getOpcode() == ISD::Constant) {
- uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- if (c1 < VT.getSizeInBits()) {
- uint64_t c2 = N1C->getZExtValue();
- APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
- VT.getSizeInBits() - c1);
- SDValue Shift;
- if (c2 > c1) {
- Mask = Mask.shl(c2-c1);
- Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c2-c1, N1.getValueType()));
- } else {
- Mask = Mask.lshr(c1-c2);
- Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1-c2, N1.getValueType()));
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t c1 = N0C1->getZExtValue();
+ if (c1 < OpSizeInBits) {
+ uint64_t c2 = N1C->getZExtValue();
+ APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
+ SDValue Shift;
+ if (c2 > c1) {
+ Mask = Mask.shl(c2 - c1);
+ Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(c2 - c1, N1.getValueType()));
+ } else {
+ Mask = Mask.lshr(c1 - c2);
+ Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(c1 - c2, N1.getValueType()));
+ }
+ return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
+ DAG.getConstant(Mask, VT));
}
- return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
- DAG.getConstant(Mask, VT));
}
}
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+ unsigned BitSize = VT.getScalarSizeInBits();
SDValue HiBitsMask =
- DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
- VT.getSizeInBits() -
- N1C->getZExtValue()),
- VT);
+ DAG.getConstant(APInt::getHighBitsSet(BitSize,
+ BitSize - N1C->getZExtValue()), VT);
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
HiBitsMask);
}
if (N1C) {
- SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+ SDValue NewSHL = visitShiftByConstant(N, N1C);
if (NewSHL.getNode())
return NewSHL;
}
@@ -3796,6 +4140,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
+
+ N1C = isConstOrConstSplat(N1);
}
// fold (sra c1, c2) -> (sra c1, c2)
@@ -3829,11 +4175,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SRA) {
- if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
- if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
+ if (Sum >= OpSizeInBits)
+ Sum = OpSizeInBits - 1;
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(Sum, N1C->getValueType(0)));
+ DAG.getConstant(Sum, N1.getValueType()));
}
}
@@ -3842,14 +4189,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// result_size - n != m.
// If truncate is free for the target sext(shl) is likely to result in better
// code.
- if (N0.getOpcode() == ISD::SHL) {
+ if (N0.getOpcode() == ISD::SHL && N1C) {
// Get the two constanst of the shifts, CN0 = m, CN = n.
- const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (N01C && N1C) {
+ const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
+ if (N01C) {
+ LLVMContext &Ctx = *DAG.getContext();
// Determine what the truncate's result bitsize and type would be.
- EVT TruncVT =
- EVT::getIntegerVT(*DAG.getContext(),
- OpSizeInBits - N1C->getZExtValue());
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
+
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+
// Determine the residual right-shift amount.
signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
@@ -3876,44 +4226,33 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
- N1.getOperand(0).getOpcode() == ISD::AND &&
- N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
- SDValue N101 = N1.getOperand(0).getOperand(1);
- if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
- EVT TruncVT = N1.getValueType();
- SDValue N100 = N1.getOperand(0).getOperand(0);
- APInt TruncC = N101C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
- return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
- DAG.getNode(ISD::AND, SDLoc(N),
- TruncVT,
- DAG.getNode(ISD::TRUNCATE,
- SDLoc(N),
- TruncVT, N100),
- DAG.getConstant(TruncC, TruncVT)));
- }
- }
-
- // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+ if (NewOp1.getNode())
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
+ }
+
+ // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
// if c1 is equal to the number of bits the trunc removes
if (N0.getOpcode() == ISD::TRUNCATE &&
(N0.getOperand(0).getOpcode() == ISD::SRL ||
N0.getOperand(0).getOpcode() == ISD::SRA) &&
N0.getOperand(0).hasOneUse() &&
N0.getOperand(0).getOperand(1).hasOneUse() &&
- N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
- EVT LargeVT = N0.getOperand(0).getValueType();
- ConstantSDNode *LargeShiftAmt =
- cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
-
- if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
- LargeShiftAmt->getZExtValue()) {
- SDValue Amt =
- DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
- getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
- SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
- N0.getOperand(0).getOperand(0), Amt);
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+ N1C) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ unsigned LargeShiftVal = LargeShift->getZExtValue();
+ EVT LargeVT = N0Op0.getValueType();
+
+ if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+ SDValue Amt =
+ DAG.getConstant(LargeShiftVal + N1C->getZExtValue(),
+ getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
+ SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
+ N0Op0.getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+ }
}
}
@@ -3927,7 +4266,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
if (N1C) {
- SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+ SDValue NewSRA = visitShiftByConstant(N, N1C);
if (NewSRA.getNode())
return NewSRA;
}
@@ -3947,6 +4286,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
+
+ N1C = isConstOrConstSplat(N1);
}
// fold (srl c1, c2) -> c1 >>u c2
@@ -3967,14 +4308,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getConstant(0, VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
- if (N1C && N0.getOpcode() == ISD::SRL &&
- N0.getOperand(1).getOpcode() == ISD::Constant) {
- uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ if (N1C && N0.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t c1 = N01C->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
}
// fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
@@ -3999,18 +4341,21 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl (shl x, c), c) -> (and x, cst2)
- if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
- N0.getValueSizeInBits() <= 64) {
- uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(~0ULL >> ShAmt, VT));
+ if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
+ unsigned BitSize = N0.getScalarValueSizeInBits();
+ if (BitSize <= 64) {
+ uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL >> ShAmt, VT));
+ }
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
// Shifting in all undef bits?
EVT SmallVT = N0.getOperand(0).getValueType();
- if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+ unsigned BitSize = SmallVT.getScalarSizeInBits();
+ if (N1C->getZExtValue() >= BitSize)
return DAG.getUNDEF(VT);
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
@@ -4019,7 +4364,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
N0.getOperand(0),
DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
AddToWorkList(SmallShift.getNode());
- APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt);
+ APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
return DAG.getNode(ISD::AND, SDLoc(N), VT,
DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
DAG.getConstant(Mask, VT));
@@ -4028,14 +4373,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
// bit, which is unmodified by sra.
- if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+ if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
if (N0.getOpcode() == ISD::SRA)
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
}
// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
if (N1C && N0.getOpcode() == ISD::CTLZ &&
- N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+ N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
APInt KnownZero, KnownOne;
DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
@@ -4070,22 +4415,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
- N1.getOperand(0).getOpcode() == ISD::AND &&
- N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
- SDValue N101 = N1.getOperand(0).getOperand(1);
- if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
- EVT TruncVT = N1.getValueType();
- SDValue N100 = N1.getOperand(0).getOperand(0);
- APInt TruncC = N101C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getSizeInBits());
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
- DAG.getNode(ISD::AND, SDLoc(N),
- TruncVT,
- DAG.getNode(ISD::TRUNCATE,
- SDLoc(N),
- TruncVT, N100),
- DAG.getConstant(TruncC, TruncVT)));
- }
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+ if (NewOp1.getNode())
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
}
// fold operands of srl based on knowledge that the low bits are not
@@ -4094,7 +4427,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue(N, 0);
if (N1C) {
- SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+ SDValue NewSRL = visitShiftByConstant(N, N1C);
if (NewSRL.getNode())
return NewSRL;
}
@@ -4275,12 +4608,12 @@ static
std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT LoVT, HiVT;
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
// Split the inputs.
SDValue Lo, Hi, LL, LH, RL, RH;
- llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
@@ -4338,9 +4671,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
return SDValue();
SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
- llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
- llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
- llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
+ std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
@@ -4353,6 +4686,13 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
}
+ // Fold (vselect (build_vector all_ones), N1, N2) -> N1
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ return N1;
+ // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N2;
+
return SDValue();
}
@@ -4402,6 +4742,65 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
SDLoc(N));
}
+// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext
+// dag node into a ConstantSDNode or a build_vector of constants.
+// This function is called by the DAGCombiner when visiting sext/zext/aext
+// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+// Vector extends are not folded if operations are legal; this is to
+// avoid introducing illegal build_vector dag nodes.
+static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG, bool LegalTypes,
+ bool LegalOperations) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
+ Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!");
+
+ // fold (sext c1) -> c1
+ // fold (zext c1) -> c1
+ // fold (aext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
+
+ // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
+ // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
+ // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
+ EVT SVT = VT.getScalarType();
+ if (!(VT.isVector() &&
+ (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
+ return 0;
+
+ // We can fold this node into a build_vector.
+ unsigned VTBits = SVT.getSizeInBits();
+ unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
+ unsigned ShAmt = VTBits - EVTBits;
+ SmallVector<SDValue, 8> Elts;
+ unsigned NumElts = N0->getNumOperands();
+ SDLoc DL(N);
+
+ for (unsigned i=0; i != NumElts; ++i) {
+ SDValue Op = N0->getOperand(i);
+ if (Op->getOpcode() == ISD::UNDEF) {
+ Elts.push_back(DAG.getUNDEF(SVT));
+ continue;
+ }
+
+ ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
+ const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
+ if (Opcode == ISD::SIGN_EXTEND)
+ Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
+ SVT));
+ else
+ Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
+ SVT));
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], NumElts).getNode();
+}
+
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
// transformation. Returns true if extension are possible and the above
@@ -4492,9 +4891,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- // fold (sext c1) -> c1
- if (isa<ConstantSDNode>(N0))
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0);
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
// fold (sext (sext x)) -> (sext x)
// fold (sext (aext x)) -> (sext x)
@@ -4671,7 +5070,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
}
- // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+ // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
unsigned ElementWidth = VT.getScalarType().getSizeInBits();
SDValue NegOne =
DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
@@ -4680,15 +5079,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
NegOne, DAG.getConstant(0, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
- if (!VT.isVector() &&
- (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) {
- return DAG.getSelect(SDLoc(N), VT,
- DAG.getSetCC(SDLoc(N),
- getSetCCResultType(VT),
- N0.getOperand(0), N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get()),
- NegOne, DAG.getConstant(0, VT));
+
+ if (!VT.isVector()) {
+ EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
+ SDLoc DL(N);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ SDValue SetCC = DAG.getSetCC(DL,
+ SetCCVT,
+ N0.getOperand(0), N0.getOperand(1), CC);
+ EVT SelectVT = getSetCCResultType(VT);
+ return DAG.getSelect(DL, VT,
+ DAG.getSExtOrTrunc(SetCC, DL, SelectVT),
+ NegOne, DAG.getConstant(0, VT));
+
+ }
}
}
@@ -4742,9 +5147,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- // fold (zext c1) -> c1
- if (isa<ConstantSDNode>(N0))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
+
// fold (zext (zext x)) -> (zext x)
// fold (zext (aext x)) -> (zext x)
if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
@@ -4925,10 +5331,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
if (N0.getOpcode() == ISD::SETCC) {
- if (!LegalOperations && VT.isVector()) {
+ if (!LegalOperations && VT.isVector() &&
+ N0.getValueType().getVectorElementType() == MVT::i1) {
+ EVT N0VT = N0.getOperand(0).getValueType();
+ if (getSetCCResultType(N0VT) == N0.getValueType())
+ return SDValue();
+
// zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
// Only do this before legalize for now.
- EVT N0VT = N0.getOperand(0).getValueType();
EVT EltVT = VT.getVectorElementType();
SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
DAG.getConstant(1, EltVT));
@@ -5007,9 +5417,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- // fold (aext c1) -> c1
- if (isa<ConstantSDNode>(N0))
- return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0);
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
+
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
@@ -5466,6 +5877,29 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
BSwap, N1);
}
+ // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs
+ // into a build_vector.
+ if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+ SmallVector<SDValue, 8> Elts;
+ unsigned NumElts = N0->getNumOperands();
+ unsigned ShAmt = VTBits - EVTBits;
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Op = N0->getOperand(i);
+ if (Op->getOpcode() == ISD::UNDEF) {
+ Elts.push_back(Op);
+ continue;
+ }
+
+ ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
+ const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
+ Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
+ Op.getValueType()));
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Elts[0], NumElts);
+ }
+
return SDValue();
}
@@ -5510,7 +5944,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// creates this pattern) and before operation legalization after which
// we need to be more careful about the vector instructions that we generate.
if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- LegalTypes && !LegalOperations && N0->hasOneUse()) {
+ LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
EVT VecTy = N0.getOperand(0).getValueType();
EVT ExTy = N0.getValueType();
@@ -5587,6 +6021,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue Reduced = ReduceLoadWidth(N);
if (Reduced.getNode())
return Reduced;
+ // Handle the case where the load remains an extending load even
+ // after truncation.
+ if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ if (!LN0->isVolatile() &&
+ LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
+ SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getMemoryVT(),
+ LN0->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
+ return NewLoad;
+ }
+ }
}
// fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
// where ... are all 'undef'.
@@ -5654,8 +6102,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
- LD1->getPointerInfo().getAddrSpace() !=
- LD2->getPointerInfo().getAddrSpace())
+ LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
@@ -5691,14 +6138,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (!LegalTypes &&
N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
VT.isVector()) {
- bool isSimple = true;
- for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
- if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
- N0.getOperand(i).getOpcode() != ISD::Constant &&
- N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
- isSimple = false;
- break;
- }
+ bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
EVT DestEltVT = N->getValueType(0).getVectorElementType();
assert(!DestEltVT.isVector() &&
@@ -6551,7 +6991,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
}
- // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // The next optimizations are desirable only if SELECT_CC can be lowered.
// Check against MVT::Other for SELECT_CC, which is a workaround for targets
// having to say they don't support SELECT_CC on every type the DAG knows
// about, since there is no way to mark an opcode illegal at all value types
@@ -6608,7 +7048,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
}
- // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // The next optimizations are desirable only if SELECT_CC can be lowered.
// Check against MVT::Other for SELECT_CC, which is a workaround for targets
// having to say they don't support SELECT_CC on every type the DAG knows
// about, since there is no way to mark an opcode illegal at all value types
@@ -7537,7 +7977,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
- if (UseAA) {
+#ifndef NDEBUG
+ if (CombinerAAOnlyFunc.getNumOccurrences() &&
+ CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+ UseAA = false;
+#endif
+ if (UseAA && LD->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -7888,14 +8333,6 @@ struct LoadedSlice {
};
}
-/// \brief Sorts LoadedSlice according to their offset.
-struct LoadedSliceSorter {
- bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) {
- assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
- return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
- }
-};
-
/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) {
@@ -7939,7 +8376,11 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
// Sort the slices so that elements that are likely to be next to each
// other in memory are next to each other in the list.
- std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter());
+ std::sort(LoadedSlices.begin(), LoadedSlices.end(),
+ [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+ assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
+ return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
+ });
const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
// First (resp. Second) is the first (resp. Second) potentially candidate
// to be placed in a paired load.
@@ -8075,8 +8516,8 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
// The width of the type must be a power of 2 and greater than 8-bits.
// Otherwise the load cannot be represented in LLVM IR.
- // Moreover, if we shifted with a non 8-bits multiple, the slice
- // will be accross several bytes. We do not support that.
+ // Moreover, if we shifted with a non-8-bits multiple, the slice
+ // will be across several bytes. We do not support that.
unsigned Width = User->getValueSizeInBits(0);
if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
return 0;
@@ -8543,14 +8984,6 @@ struct MemOpLink {
unsigned SequenceNum;
};
-/// Sorts store nodes in a link according to their offset from a shared
-// base ptr.
-struct ConsecutiveMemoryChainSorter {
- bool operator()(MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase;
- }
-};
-
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
@@ -8669,7 +9102,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Sort the memory operands according to their distance from the base pointer.
std::sort(StoreNodes.begin(), StoreNodes.end(),
- ConsecutiveMemoryChainSorter());
+ [](MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase ||
+ (LHS.OffsetFromBase == RHS.OffsetFromBase &&
+ LHS.SequenceNum > RHS.SequenceNum);
+ });
// Scan the memory operations on the chain and find the first non-consecutive
// store memory address.
@@ -8717,7 +9154,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
NonZero |= !C->getConstantFPValue()->isNullValue();
} else {
- // Non constant.
+ // Non-constant.
break;
}
@@ -9125,7 +9562,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
- if (UseAA) {
+#ifndef NDEBUG
+ if (CombinerAAOnlyFunc.getNumOccurrences() &&
+ CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+ UseAA = false;
+#endif
+ if (UseAA && ST->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -9306,9 +9748,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// We only perform this optimization before the op legalization phase because
// we may introduce new vector instructions which are not backed by TD
// patterns. For example on AVX, extracting elements from a wide vector
- // without using extract_subvector.
+ // without using extract_subvector. However, if we can find an underlying
+ // scalar value, then we can always use that.
if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
- && ConstEltNo && !LegalOperations) {
+ && ConstEltNo) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int NumElem = VT.getVectorNumElements();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
@@ -9320,16 +9763,32 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
return DAG.getUNDEF(NVT);
// Select the right vector half to extract from.
+ SDValue SVInVec;
if (OrigElt < NumElem) {
- InVec = InVec->getOperand(0);
+ SVInVec = InVec->getOperand(0);
} else {
- InVec = InVec->getOperand(1);
+ SVInVec = InVec->getOperand(1);
OrigElt -= NumElem;
}
- EVT IndexTy = TLI.getVectorIdxTy();
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
- InVec, DAG.getConstant(OrigElt, IndexTy));
+ if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue InOp = SVInVec.getOperand(OrigElt);
+ if (InOp.getValueType() != NVT) {
+ assert(InOp.getValueType().isInteger() && NVT.isInteger());
+ InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
+ }
+
+ return InOp;
+ }
+
+ // FIXME: We should handle recursing on other vector shuffles and
+ // scalar_to_vector here as well.
+
+ if (!LegalOperations) {
+ EVT IndexTy = TLI.getVectorIdxTy();
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
+ SVInVec, DAG.getConstant(OrigElt, IndexTy));
+ }
}
// Perform only after legalization to ensure build_vector / vector_shuffle
@@ -9836,6 +10295,26 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
}
+ // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
+ // -> (BUILD_VECTOR A, B, ..., C, D, ...)
+ if (N->getNumOperands() == 2 &&
+ N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SmallVector<SDValue, 8> Opnds;
+ unsigned BuildVecNumElts = N0.getNumOperands();
+
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N0.getOperand(i));
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N1.getOperand(i));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0],
+ Opnds.size());
+ }
+
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes.
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -10142,6 +10621,33 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N2 = N->getOperand(2);
+
+ // If the input vector is a concatenation, and the insert replaces
+ // one of the halves, we can optimize into a single concat_vectors.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+ N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) {
+ APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
+ EVT VT = N->getValueType(0);
+
+ // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
+ // (concat_vectors Z, Y)
+ if (InsIdx == 0)
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+ N->getOperand(1), N0.getOperand(1));
+
+ // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
+ // (concat_vectors X, Z)
+ if (InsIdx == VT.getVectorNumElements()/2)
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+ N0.getOperand(0), N->getOperand(1));
+ }
+
+ return SDValue();
+}
+
/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
/// an AND to a vector_shuffle with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -10204,18 +10710,15 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
// this operation.
if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ // Check if both vectors are constants. If not bail out.
+ if (!(cast<BuildVectorSDNode>(LHS)->isConstant() &&
+ cast<BuildVectorSDNode>(RHS)->isConstant()))
+ return SDValue();
+
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
SDValue LHSOp = LHS.getOperand(i);
SDValue RHSOp = RHS.getOperand(i);
- // If these two elements can't be folded, bail out.
- if ((LHSOp.getOpcode() != ISD::UNDEF &&
- LHSOp.getOpcode() != ISD::Constant &&
- LHSOp.getOpcode() != ISD::ConstantFP) ||
- (RHSOp.getOpcode() != ISD::UNDEF &&
- RHSOp.getOpcode() != ISD::Constant &&
- RHSOp.getOpcode() != ISD::ConstantFP))
- break;
// Can't fold divide by zero.
if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
@@ -10862,14 +11365,21 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA :
TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+#ifndef NDEBUG
+ if (CombinerAAOnlyFunc.getNumOccurrences() &&
+ CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+ UseAA = false;
+#endif
if (UseAA && SrcValue1 && SrcValue2) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
AliasAnalysis::AliasResult AAResult =
- AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
- AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
+ AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1,
+ UseTBAA ? TBAAInfo1 : 0),
+ AliasAnalysis::Location(SrcValue2, Overlap2,
+ UseTBAA ? TBAAInfo2 : 0));
if (AAResult == AliasAnalysis::NoAlias)
return false;
}
@@ -10956,7 +11466,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
if (Depth > 6 || Aliases.size() == 2) {
Aliases.clear();
Aliases.push_back(OriginalChain);
- break;
+ return;
}
// Don't bother if we've been before.
@@ -11018,6 +11528,63 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
break;
}
}
+
+ // We need to be careful here to also search for aliases through the
+ // value operand of a store, etc. Consider the following situation:
+ // Token1 = ...
+ // L1 = load Token1, %52
+ // S1 = store Token1, L1, %51
+ // L2 = load Token1, %52+8
+ // S2 = store Token1, L2, %51+8
+ // Token2 = Token(S1, S2)
+ // L3 = load Token2, %53
+ // S3 = store Token2, L3, %52
+ // L4 = load Token2, %53+8
+ // S4 = store Token2, L4, %52+8
+ // If we search for aliases of S3 (which loads address %52), and we look
+ // only through the chain, then we'll miss the trivial dependence on L1
+ // (which also loads from %52). We then might change all loads and
+ // stores to use Token1 as their chain operand, which could result in
+ // copying %53 into %52 before copying %52 into %51 (which should
+ // happen first).
+ //
+ // The problem is, however, that searching for such data dependencies
+ // can become expensive, and the cost is not directly related to the
+ // chain depth. Instead, we'll rule out such configurations here by
+ // insisting that we've visited all chain users (except for users
+ // of the original chain, which is not necessary). When doing this,
+ // we need to look through nodes we don't care about (otherwise, things
+ // like register copies will interfere with trivial cases).
+
+ SmallVector<const SDNode *, 16> Worklist;
+ for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(),
+ IE = Visited.end(); I != IE; ++I)
+ if (*I != OriginalChain.getNode())
+ Worklist.push_back(*I);
+
+ while (!Worklist.empty()) {
+ const SDNode *M = Worklist.pop_back_val();
+
+ // We have already visited M, and want to make sure we've visited any uses
+ // of M that we care about. For uses that we've not visisted, and don't
+ // care about, queue them to the worklist.
+
+ for (SDNode::use_iterator UI = M->use_begin(),
+ UIE = M->use_end(); UI != UIE; ++UI)
+ if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) {
+ if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) {
+ // We've not visited this use, and we care about it (it could have an
+ // ordering dependency with the original node).
+ Aliases.clear();
+ Aliases.push_back(OriginalChain);
+ return;
+ }
+
+ // We've not visited this use, but we don't care about it. Mark it as
+ // visited and enqueue it to the worklist.
+ Worklist.push_back(*UI);
+ }
+ }
}
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index a6f746140d..baba51eaf2 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -49,8 +49,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
@@ -118,7 +118,7 @@ bool FastISel::hasTrivialKill(const Value *V) const {
// No-op casts are trivially coalesced by fast-isel.
if (const CastInst *Cast = dyn_cast<CastInst>(I))
- if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) &&
+ if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) &&
!hasTrivialKill(Cast->getOperand(0)))
return false;
@@ -133,7 +133,7 @@ bool FastISel::hasTrivialKill(const Value *V) const {
!(I->getOpcode() == Instruction::BitCast ||
I->getOpcode() == Instruction::PtrToInt ||
I->getOpcode() == Instruction::IntToPtr) &&
- cast<Instruction>(*I->use_begin())->getParent() == I->getParent();
+ cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
}
unsigned FastISel::getRegForValue(const Value *V) {
@@ -192,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
// Translate this as an integer zero so that it can be
// local-CSE'd with actual integer zeros.
Reg =
- getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
+ getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getContext())));
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
if (CF->isNullValue()) {
Reg = TargetMaterializeFloatZero(CF);
@@ -229,7 +229,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
Reg = lookUpRegForValue(Op);
} else if (isa<UndefValue>(V)) {
Reg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
}
@@ -335,20 +335,20 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
FastISel::SavePoint FastISel::enterLocalValueArea() {
MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
- DebugLoc OldDL = DL;
+ DebugLoc OldDL = DbgLoc;
recomputeInsertPt();
- DL = DebugLoc();
+ DbgLoc = DebugLoc();
SavePoint SP = { OldInsertPt, OldDL };
return SP;
}
void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
- LastLocalValue = llvm::prior(FuncInfo.InsertPt);
+ LastLocalValue = std::prev(FuncInfo.InsertPt);
// Restore the previous insert position.
FuncInfo.InsertPt = OldInsertPt.InsertPt;
- DL = OldInsertPt.DL;
+ DbgLoc = OldInsertPt.DL;
}
/// SelectBinaryOp - Select and emit code for a binary operator instruction,
@@ -484,7 +484,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
- TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field);
+ TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
if (TotalOffs >= MaxOffs) {
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
if (N == 0)
@@ -503,7 +503,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
if (CI->isZero()) continue;
// N = N + Offset
TotalOffs +=
- TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ DL.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
if (TotalOffs >= MaxOffs) {
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
if (N == 0)
@@ -524,7 +524,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
}
// N = N + Idx * ElementSize;
- uint64_t ElementSize = TD.getTypeAllocSize(Ty);
+ uint64_t ElementSize = DL.getTypeAllocSize(Ty);
std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
unsigned IdxN = Pair.first;
bool IdxNIsKill = Pair.second;
@@ -572,7 +572,7 @@ bool FastISel::SelectCall(const User *I) {
if (IA->isAlignStack())
ExtraInfo |= InlineAsm::Extra_IsAlignStack;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::INLINEASM))
.addExternalSymbol(IA->getAsmString().c_str())
.addImm(ExtraInfo);
@@ -643,11 +643,11 @@ bool FastISel::SelectCall(const User *I) {
if (Op) {
if (Op->isReg()) {
Op->setIsDebug(true);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0,
DI->getVariable());
} else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE))
.addOperand(*Op)
.addImm(0)
@@ -667,26 +667,26 @@ bool FastISel::SelectCall(const User *I) {
if (!V) {
// Currently the optimizer can produce this; insert an undef to
// help debugging. Probably the optimizer should not do this.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addReg(0U).addImm(DI->getOffset())
.addMetadata(DI->getVariable());
} else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getBitWidth() > 64)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addCImm(CI).addImm(DI->getOffset())
.addMetadata(DI->getVariable());
else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addImm(CI->getZExtValue()).addImm(DI->getOffset())
.addMetadata(DI->getVariable());
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addFPImm(CF).addImm(DI->getOffset())
.addMetadata(DI->getVariable());
} else if (unsigned Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
bool IsIndirect = DI->getOffset() != 0;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect,
Reg, DI->getOffset(), DI->getVariable());
} else {
// We can't yet handle anything else here because it would require
@@ -798,8 +798,8 @@ bool FastISel::SelectBitCast(const User *I) {
// Don't attempt a cross-class copy. It will likely fail.
if (SrcClass == DstClass) {
ResultReg = createResultReg(DstClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Op0);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0);
}
}
@@ -822,7 +822,7 @@ FastISel::SelectInstruction(const Instruction *I) {
if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
return false;
- DL = I->getDebugLoc();
+ DbgLoc = I->getDebugLoc();
MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
@@ -840,7 +840,7 @@ FastISel::SelectInstruction(const Instruction *I) {
// First, try doing target-independent selection.
if (SelectOperator(I, I->getOpcode())) {
++NumFastIselSuccessIndependent;
- DL = DebugLoc();
+ DbgLoc = DebugLoc();
return true;
}
// Remove dead code. However, ignore call instructions since we've flushed
@@ -855,7 +855,7 @@ FastISel::SelectInstruction(const Instruction *I) {
SavedInsertPt = FuncInfo.InsertPt;
if (TargetSelectInstruction(I)) {
++NumFastIselSuccessTarget;
- DL = DebugLoc();
+ DbgLoc = DebugLoc();
return true;
}
// Check for dead code and remove as necessary.
@@ -863,7 +863,7 @@ FastISel::SelectInstruction(const Instruction *I) {
if (SavedInsertPt != FuncInfo.InsertPt)
removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
- DL = DebugLoc();
+ DbgLoc = DebugLoc();
return false;
}
@@ -871,7 +871,7 @@ FastISel::SelectInstruction(const Instruction *I) {
/// unless it is the immediate (fall-through) successor, and update
/// the CFG.
void
-FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
@@ -881,7 +881,7 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
} else {
// The unconditional branch case.
TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
- SmallVector<MachineOperand, 0>(), DL);
+ SmallVector<MachineOperand, 0>(), DbgLoc);
}
FuncInfo.MBB->addSuccessor(MSucc);
}
@@ -1096,7 +1096,7 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo,
MFI(*FuncInfo.MF->getFrameInfo()),
MCP(*FuncInfo.MF->getConstantPool()),
TM(FuncInfo.MF->getTarget()),
- TD(*TM.getDataLayout()),
+ DL(*TM.getDataLayout()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()),
TRI(*TM.getRegisterInfo()),
@@ -1209,7 +1209,7 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg);
return ResultReg;
}
@@ -1220,13 +1220,13 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addReg(Op0, Op0IsKill * RegState::Kill);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
@@ -1240,15 +1240,15 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1262,17 +1262,17 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addReg(Op2, Op2IsKill * RegState::Kill);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addReg(Op2, Op2IsKill * RegState::Kill);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1285,15 +1285,15 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addImm(Imm);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addImm(Imm);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1306,17 +1306,17 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addImm(Imm1)
.addImm(Imm2);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addImm(Imm1)
.addImm(Imm2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1329,15 +1329,15 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addFPImm(FPImm);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addFPImm(FPImm);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1351,17 +1351,17 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addImm(Imm);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addImm(Imm);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1375,17 +1375,17 @@ unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addImm(Imm1).addImm(Imm2);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addImm(Imm1).addImm(Imm2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1397,11 +1397,11 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg).addImm(Imm);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1413,12 +1413,12 @@ unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addImm(Imm1).addImm(Imm2);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1432,7 +1432,7 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
const TargetRegisterClass *RC = MRI.getRegClass(Op0);
MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DL, TII.get(TargetOpcode::COPY), ResultReg)
+ DbgLoc, TII.get(TargetOpcode::COPY), ResultReg)
.addReg(Op0, getKillRegState(Op0IsKill), Idx);
return ResultReg;
}
@@ -1498,9 +1498,9 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// Set the DebugLoc for the copy. Prefer the location of the operand
// if there is one; use the location of the PHI otherwise.
- DL = PN->getDebugLoc();
+ DbgLoc = PN->getDebugLoc();
if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
- DL = Inst->getDebugLoc();
+ DbgLoc = Inst->getDebugLoc();
unsigned Reg = getRegForValue(PHIOp);
if (Reg == 0) {
@@ -1508,7 +1508,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
return false;
}
FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
- DL = DebugLoc();
+ DbgLoc = DebugLoc();
}
}
@@ -1523,7 +1523,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
// this by scanning the single-use users of the load until we get to FoldInst.
unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
- const Instruction *TheUser = LI->use_back();
+ const Instruction *TheUser = LI->user_back();
while (TheUser != FoldInst && // Scan up until we find FoldInst.
// Stay in the right block.
TheUser->getParent() == FoldInst->getParent() &&
@@ -1532,7 +1532,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
if (!TheUser->hasOneUse())
return false;
- TheUser = TheUser->use_back();
+ TheUser = TheUser->user_back();
}
// If we didn't find the fold instruction, then we failed to collapse the
@@ -1559,7 +1559,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
return false;
MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
- MachineInstr *User = &*RI;
+ MachineInstr *User = RI->getParent();
// Set the insertion point properly. Folding the load can cause generation of
// other random instructions (like sign extends) for addressing modes; make
@@ -1576,8 +1576,8 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
if (!isa<AddOperator>(Add))
return false;
// Type size needs to match.
- if (TD.getTypeSizeInBits(GEP->getType()) !=
- TD.getTypeSizeInBits(Add->getType()))
+ if (DL.getTypeSizeInBits(GEP->getType()) !=
+ DL.getTypeSizeInBits(Add->getType()))
return false;
// Must be in the same basic block.
if (isa<Instruction>(Add) &&
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 4309dc1d48..5f0006e237 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -21,8 +21,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
@@ -32,6 +32,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
@@ -46,16 +47,15 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
if (I->use_empty()) return false;
if (isa<PHINode>(I)) return true;
const BasicBlock *BB = I->getParent();
- for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI) {
- const User *U = *UI;
+ for (const User *U : I->users())
if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U))
return true;
- }
+
return false;
}
-void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
+ SelectionDAG *DAG) {
const TargetLowering *TLI = TM.getTargetLowering();
Fn = &fn;
@@ -74,7 +74,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
// them.
Function::const_iterator BB = Fn->begin(), EB = Fn->end();
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ // Don't fold inalloca allocas or other dynamic allocas into the initial
+ // stack frame allocation, even if they are in the entry block.
+ if (!AI->isStaticAlloca())
+ continue;
+
if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
Type *Ty = AI->getAllocatedType();
uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
@@ -85,21 +90,51 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
- // The object may need to be placed onto the stack near the stack
- // protector if one exists. Determine here if this object is a suitable
- // candidate. I.e., it would trigger the creation of a stack protector.
- bool MayNeedSP =
- (AI->isArrayAllocation() ||
- (TySize >= 8 && isa<ArrayType>(Ty) &&
- cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
StaticAllocaMap[AI] =
- MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
- MayNeedSP, AI);
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
}
+ }
for (; BB != EB; ++BB)
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
I != E; ++I) {
+ // Look for dynamic allocas.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ if (!AI->isStaticAlloca()) {
+ unsigned Align = std::max(
+ (unsigned)TLI->getDataLayout()->getPrefTypeAlignment(
+ AI->getAllocatedType()),
+ AI->getAlignment());
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ if (Align <= StackAlign)
+ Align = 0;
+ // Inform the Frame Information that we have variable-sized objects.
+ MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI);
+ }
+ }
+
+ // Look for inline asm that clobbers the SP register.
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ ImmutableCallSite CS(I);
+ if (isa<InlineAsm>(CS.getCalledValue())) {
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ std::vector<TargetLowering::AsmOperandInfo> Ops =
+ TLI->ParseConstraints(CS);
+ for (size_t I = 0, E = Ops.size(); I != E; ++I) {
+ TargetLowering::AsmOperandInfo &Op = Ops[I];
+ if (Op.Type == InlineAsm::isClobber) {
+ // Clobbers don't have SDValue operands, hence SDValue().
+ TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
+ std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+ TLI->getRegForInlineAsmConstraint(Op.ConstraintCode,
+ Op.ConstraintVT);
+ if (PhysReg.first == SP)
+ MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true);
+ }
+ }
+ }
+ }
+
// Mark values used outside their block as exported, by allocating
// a virtual register for them.
if (isUsedOutsideOfDefiningBlock(I))
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 3a8fb85911..1c596b8c42 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -220,10 +220,19 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
unsigned VRBase = 0;
const TargetRegisterClass *RC =
TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
- // If the register class is unknown for the given definition, then try to
- // infer one from the value type.
- if (!RC && i < NumResults)
- RC = TLI->getRegClassFor(Node->getSimpleValueType(i));
+ // Always let the value type influence the used register class. The
+ // constraints on the instruction may be too lax to represent the value
+ // type correctly. For example, a 64-bit float (X86::FR64) can't live in
+ // the 32-bit float super-class (X86::FR32).
+ if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) {
+ const TargetRegisterClass *VTRC =
+ TLI->getRegClassFor(Node->getSimpleValueType(i));
+ if (RC)
+ VTRC = TRI->getCommonSubClass(RC, VTRC);
+ if (VTRC)
+ RC = VTRC;
+ }
+
if (II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
unsigned NumResults = CountResults(Node);
@@ -731,10 +740,16 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
unsigned NumDefs = II.getNumDefs();
const uint16_t *ScratchRegs = NULL;
- // Handle PATCHPOINT specially and then use the generic code.
- if (Opc == TargetOpcode::PATCHPOINT) {
- unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos);
- NumDefs = NumResults;
+ // Handle STACKMAP and PATCHPOINT specially and then use the generic code.
+ if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
+ // Stackmaps do not have arguments and do not preserve their calling
+ // convention. However, to simplify runtime support, they clobber the same
+ // scratch registers as AnyRegCC.
+ unsigned CC = CallingConv::AnyReg;
+ if (Opc == TargetOpcode::PATCHPOINT) {
+ CC = Node->getConstantOperandVal(PatchPointOpers::CCPos);
+ NumDefs = NumResults;
+ }
ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 9061ae9f76..20afb3d594 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -13,15 +13,16 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -152,10 +153,10 @@ private:
public:
// DAGUpdateListener implementation.
- virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ void NodeDeleted(SDNode *N, SDNode *E) override {
ForgetNode(N);
}
- virtual void NodeUpdated(SDNode *N) {}
+ void NodeUpdated(SDNode *N) override {}
// Node replacement helpers
void ReplacedNode(SDNode *N) {
@@ -729,10 +730,11 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
MVT VT = Value.getSimpleValueType();
switch (TLI.getOperationAction(ISD::STORE, VT)) {
default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
+ case TargetLowering::Legal: {
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ unsigned AS = ST->getAddressSpace();
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
@@ -740,6 +742,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
DAG, TLI, this);
}
break;
+ }
case TargetLowering::Custom: {
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
if (Res.getNode())
@@ -807,7 +810,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(RoundWidth,
- TLI.getShiftAmountTy(Value.getValueType())));
+ TLI.getShiftAmountTy(Value.getValueType())));
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
@@ -818,7 +821,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Store the top RoundWidth bits.
Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(ExtraWidth,
- TLI.getShiftAmountTy(Value.getValueType())));
+ TLI.getShiftAmountTy(Value.getValueType())));
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
RoundVT, isVolatile, isNonTemporal, Alignment,
TBAAInfo);
@@ -826,7 +829,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, Ptr.getValueType()));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
@@ -840,16 +843,18 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(),
StVT.getSimpleVT())) {
default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
+ case TargetLowering::Legal: {
+ unsigned AS = ST->getAddressSpace();
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
}
break;
+ }
case TargetLowering::Custom: {
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
if (Res.getNode())
@@ -889,10 +894,11 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
+ case TargetLowering::Legal: {
+ unsigned AS = LD->getAddressSpace();
// If this is an unaligned load and the target doesn't support it,
// expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) {
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment =
TLI.getDataLayout()->getABITypeAlignment(Ty);
@@ -901,6 +907,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
}
}
break;
+ }
case TargetLowering::Custom: {
SDValue Res = TLI.LowerOperation(RVal, DAG);
if (Res.getNode()) {
@@ -1017,7 +1024,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Move the top bits to the right place.
Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
DAG.getConstant(RoundWidth,
- TLI.getShiftAmountTy(Hi.getValueType())));
+ TLI.getShiftAmountTy(Hi.getValueType())));
// Join the hi and lo parts.
Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1047,7 +1054,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Move the top bits to the right place.
Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
DAG.getConstant(ExtraWidth,
- TLI.getShiftAmountTy(Hi.getValueType())));
+ TLI.getShiftAmountTy(Hi.getValueType())));
// Join the hi and lo parts.
Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1059,77 +1066,82 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Custom:
- isCustom = true;
- // FALLTHROUGH
+ isCustom = true;
+ // FALLTHROUGH
case TargetLowering::Legal: {
- Value = SDValue(Node, 0);
- Chain = SDValue(Node, 1);
-
- if (isCustom) {
- SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Res.getNode()) {
- Value = Res;
- Chain = Res.getValue(1);
- }
- } else {
- // If this is an unaligned load and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- Type *Ty =
- LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment =
- TLI.getDataLayout()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node),
- DAG, TLI, Value, Chain);
- }
- }
- }
- break;
+ Value = SDValue(Node, 0);
+ Chain = SDValue(Node, 1);
+
+ if (isCustom) {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode()) {
+ Value = Res;
+ Chain = Res.getValue(1);
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support
+ // it, expand it.
+ EVT MemVT = LD->getMemoryVT();
+ unsigned AS = LD->getAddressSpace();
+ if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) {
+ Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, Value, Chain);
+ }
+ }
+ }
+ break;
}
case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
- SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
- LD->getMemOperand());
- unsigned ExtendOp;
- switch (ExtType) {
- case ISD::EXTLOAD:
- ExtendOp = (SrcVT.isFloatingPoint() ?
- ISD::FP_EXTEND : ISD::ANY_EXTEND);
- break;
- case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
- default: llvm_unreachable("Unexpected extend load type!");
- }
- Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Chain = Load.getValue(1);
- break;
- }
-
- assert(!SrcVT.isVector() &&
- "Vector Loads are handled in LegalizeVectorOps");
-
- // FIXME: This does not work for vectors on most targets. Sign- and
- // zero-extend operations are currently folded into extending loads,
- // whether they are legal or not, and then we end up here without any
- // support for legalizing them.
- assert(ExtType != ISD::EXTLOAD &&
- "EXTLOAD should always be supported!");
- // Turn the unsupported load into an EXTLOAD followed by an explicit
- // zero/sign extend inreg.
- SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
- Chain, Ptr, SrcVT,
- LD->getMemOperand());
- SDValue ValRes;
- if (ExtType == ISD::SEXTLOAD)
- ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else
- ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
- Value = ValRes;
- Chain = Result.getValue(1);
- break;
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) &&
+ TLI.isTypeLegal(SrcVT)) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
+ LD->getMemOperand());
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
+ }
+ Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Chain = Load.getValue(1);
+ break;
+ }
+
+ assert(!SrcVT.isVector() &&
+ "Vector Loads are handled in LegalizeVectorOps");
+
+ // FIXME: This does not work for vectors on most targets. Sign-
+ // and zero-extend operations are currently folded into extending
+ // loads, whether they are legal or not, and then we end up here
+ // without any support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an
+ // explicit zero/sign extend inreg.
+ SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl,
+ Node->getValueType(0),
+ Chain, Ptr, SrcVT,
+ LD->getMemOperand());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl,
+ SrcVT.getScalarType());
+ Value = ValRes;
+ Chain = Result.getValue(1);
+ break;
}
}
@@ -1383,10 +1395,39 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
SDValue Vec = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
SDLoc dl(Op);
- // Store the value to a temporary stack slot, then LOAD the returned part.
- SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
- MachinePointerInfo(), false, false, 0);
+
+ // Before we generate a new store to a temporary stack slot, see if there is
+ // already one that we can use. There often is because when we scalarize
+ // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole
+ // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in
+ // the vector. If all are expanded here, we don't want one store per vector
+ // element.
+ SDValue StackPtr, Ch;
+ for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
+ UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) {
+ if (ST->isIndexed() || ST->isTruncatingStore() ||
+ ST->getValue() != Vec)
+ continue;
+
+ // Make sure that nothing else could have stored into the destination of
+ // this store.
+ if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode()))
+ continue;
+
+ StackPtr = ST->getBasePtr();
+ Ch = SDValue(ST, 0);
+ break;
+ }
+ }
+
+ if (!Ch.getNode()) {
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+ StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+ }
// Add the offset to the index.
unsigned EltSize =
@@ -1530,9 +1571,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
// the pointer so that the loaded integer will contain the sign bit.
unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
- LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
- LoadPtr,
- DAG.getConstant(ByteOffset, LoadPtr.getValueType()));
+ LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr,
+ DAG.getConstant(ByteOffset, LoadPtr.getValueType()));
// Load a legal integer containing the sign bit.
SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
false, false, false, 0);
@@ -1555,8 +1595,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
// Select between the nabs and abs value based on the sign bit of
// the input.
return DAG.getSelect(dl, AbsVal.getValueType(), SignBit,
- DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
- AbsVal);
+ DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
+ AbsVal);
}
void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
@@ -1776,6 +1816,98 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
false, false, false, 0);
}
+static bool
+ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
+ const TargetLowering &TLI, SDValue &Res) {
+ unsigned NumElems = Node->getNumOperands();
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+
+ // Try to group the scalars into pairs, shuffle the pairs together, then
+ // shuffle the pairs of pairs together, etc. until the vector has
+ // been built. This will work only if all of the necessary shuffle masks
+ // are legal.
+
+ // We do this in two phases; first to check the legality of the shuffles,
+ // and next, assuming that all shuffles are legal, to create the new nodes.
+ for (int Phase = 0; Phase < 2; ++Phase) {
+ SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals,
+ NewIntermedVals;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+
+ SDValue Vec;
+ if (Phase)
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V);
+ IntermedVals.push_back(std::make_pair(Vec, SmallVector<int, 16>(1, i)));
+ }
+
+ while (IntermedVals.size() > 2) {
+ NewIntermedVals.clear();
+ for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) {
+ // This vector and the next vector are shuffled together (simply to
+ // append the one to the other).
+ SmallVector<int, 16> ShuffleVec(NumElems, -1);
+
+ SmallVector<int, 16> FinalIndices;
+ FinalIndices.reserve(IntermedVals[i].second.size() +
+ IntermedVals[i+1].second.size());
+
+ int k = 0;
+ for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f;
+ ++j, ++k) {
+ ShuffleVec[k] = j;
+ FinalIndices.push_back(IntermedVals[i].second[j]);
+ }
+ for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f;
+ ++j, ++k) {
+ ShuffleVec[k] = NumElems + j;
+ FinalIndices.push_back(IntermedVals[i+1].second[j]);
+ }
+
+ SDValue Shuffle;
+ if (Phase)
+ Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first,
+ IntermedVals[i+1].first,
+ ShuffleVec.data());
+ else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
+ return false;
+ NewIntermedVals.push_back(std::make_pair(Shuffle, FinalIndices));
+ }
+
+ // If we had an odd number of defined values, then append the last
+ // element to the array of new vectors.
+ if ((IntermedVals.size() & 1) != 0)
+ NewIntermedVals.push_back(IntermedVals.back());
+
+ IntermedVals.swap(NewIntermedVals);
+ }
+
+ assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 &&
+ "Invalid number of intermediate vectors");
+ SDValue Vec1 = IntermedVals[0].first;
+ SDValue Vec2;
+ if (IntermedVals.size() > 1)
+ Vec2 = IntermedVals[1].first;
+ else if (Phase)
+ Vec2 = DAG.getUNDEF(VT);
+
+ SmallVector<int, 16> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i)
+ ShuffleVec[IntermedVals[0].second[i]] = i;
+ for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i)
+ ShuffleVec[IntermedVals[1].second[i]] = NumElems + i;
+
+ if (Phase)
+ Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+ else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
+ return false;
+ }
+
+ return true;
+}
/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
/// support the operation, but do support the resultant vector type.
@@ -1850,25 +1982,38 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
false, false, false, Alignment);
}
- if (!MoreThanTwoValues) {
- SmallVector<int, 8> ShuffleVec(NumElems, -1);
- for (unsigned i = 0; i < NumElems; ++i) {
- SDValue V = Node->getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
- continue;
- ShuffleVec[i] = V == Value1 ? 0 : NumElems;
- }
- if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
- // Get the splatted value into the low element of a vector register.
- SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
- SDValue Vec2;
- if (Value2.getNode())
- Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
- else
- Vec2 = DAG.getUNDEF(VT);
+ SmallSet<SDValue, 16> DefinedValues;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (Node->getOperand(i).getOpcode() == ISD::UNDEF)
+ continue;
+ DefinedValues.insert(Node->getOperand(i));
+ }
- // Return shuffle(LowValVec, undef, <0,0,0,0>)
- return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+ if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) {
+ if (!MoreThanTwoValues) {
+ SmallVector<int, 8> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+ }
+ if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
+ // Get the splatted value into the low element of a vector register.
+ SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+ SDValue Vec2;
+ if (Value2.getNode())
+ Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+ else
+ Vec2 = DAG.getUNDEF(VT);
+
+ // Return shuffle(LowValVec, undef, <0,0,0,0>)
+ return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+ }
+ } else {
+ SDValue Res;
+ if (ExpandBVWithShuffles(Node, DAG, TLI, Res))
+ return Res;
}
}
@@ -2868,6 +3013,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getOperand(1), Zero, Zero,
cast<AtomicSDNode>(Node)->getMemOperand(),
cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
cast<AtomicSDNode>(Node)->getSynchScope());
Results.push_back(Swap.getValue(0));
Results.push_back(Swap.getValue(1));
@@ -3099,7 +3245,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
// BUILD_VECTOR operands are allowed to be wider than the element type.
- // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it
+ // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept
+ // it.
if (NewEltVT.bitsLT(EltVT)) {
// Convert shuffle node.
@@ -3107,8 +3254,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// cast operands to v8i32 and re-build the mask.
// Calculate new VT, the size of the new VT should be equal to original.
- EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT,
- VT.getSizeInBits()/NewEltVT.getSizeInBits());
+ EVT NewVT =
+ EVT::getVectorVT(*DAG.getContext(), NewEltVT,
+ VT.getSizeInBits() / NewEltVT.getSizeInBits());
assert(NewVT.bitsEq(VT));
// cast operands to new VT
@@ -3116,7 +3264,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
// Convert the shuffle mask
- unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements();
+ unsigned int factor =
+ NewVT.getVectorNumElements()/VT.getVectorNumElements();
// EltVT gets smaller
assert(factor > 0);
@@ -3782,8 +3931,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
} else {
Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
CC = DAG.getCondCode(ISD::SETNE);
- Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
- Tmp3, Tmp4, CC);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1,
+ Tmp2, Tmp3, Tmp4, CC);
}
}
Results.push_back(Tmp1);
@@ -3813,8 +3962,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
} else {
Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
Tmp4 = DAG.getCondCode(ISD::SETNE);
- Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
- Tmp3, Node->getOperand(4));
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4,
+ Tmp2, Tmp3, Node->getOperand(4));
}
Results.push_back(Tmp1);
break;
@@ -3976,7 +4125,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
}
case ISD::SELECT: {
unsigned ExtOp, TruncOp;
- if (Node->getValueType(0).isVector()) {
+ if (Node->getValueType(0).isVector() ||
+ Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) {
ExtOp = ISD::BITCAST;
TruncOp = ISD::BITCAST;
} else if (Node->getValueType(0).isInteger()) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 4255948ea1..18b2376b8b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -193,10 +193,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
SDValue Op2 = GetPromotedInteger(N->getOperand(2));
SDValue Op3 = GetPromotedInteger(N->getOperand(3));
- SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
- N->getMemoryVT(), N->getChain(), N->getBasePtr(),
- Op2, Op3, N->getMemOperand(), N->getOrdering(),
- N->getSynchScope());
+ SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(), Op2, Op3,
+ N->getMemOperand(), N->getSuccessOrdering(),
+ N->getFailureOrdering(), N->getSynchScope());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -2448,6 +2448,7 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
N->getOperand(1), Zero, Zero,
cast<AtomicSDNode>(N)->getMemOperand(),
cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getOrdering(),
cast<AtomicSDNode>(N)->getSynchScope());
ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
ReplaceValueWith(SDValue(N, 1), Swap.getValue(1));
@@ -2577,13 +2578,17 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
// this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL);
SDValue Tmp1, Tmp2;
- Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()),
- LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
+ if (TLI.isTypeLegal(LHSLo.getValueType()) &&
+ TLI.isTypeLegal(RHSLo.getValueType()))
+ Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
if (!Tmp1.getNode())
Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()),
LHSLo, RHSLo, LowCC);
- Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
+ if (TLI.isTypeLegal(LHSHi.getValueType()) &&
+ TLI.isTypeLegal(RHSHi.getValueType()))
+ Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
if (!Tmp2.getNode())
Tmp2 = DAG.getNode(ISD::SETCC, dl,
getSetCCResultType(LHSHi.getValueType()),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index eb132304ef..e141883744 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -634,7 +634,7 @@ namespace {
: SelectionDAG::DAGUpdateListener(dtl.getDAG()),
DTL(dtl), NodesToAnalyze(nta) {}
- virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ void NodeDeleted(SDNode *N, SDNode *E) override {
assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
N->getNodeId() != DAGTypeLegalizer::Processed &&
"Invalid node ID for RAUW deletion!");
@@ -655,7 +655,7 @@ namespace {
NodesToAnalyze.insert(E);
}
- virtual void NodeUpdated(SDNode *N) {
+ void NodeUpdated(SDNode *N) override {
// Node updates can mean pretty much anything. It is possible that an
// operand was set to something already processed (f.e.) in which case
// this node could become ready. Recompute its flags.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 13bb08f08c..947ea10fd7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -541,6 +541,7 @@ private:
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
//===--------------------------------------------------------------------===//
// Vector Splitting Support: LegalizeVectorTypes.cpp
@@ -670,13 +671,13 @@ private:
LoadSDNode *LD, ISD::LoadExtType ExtType);
/// Helper genWidenVectorStores - Helper function to generate a set of
- /// stores to store a widen vector into non widen memory
+ /// stores to store a widen vector into non-widen memory
/// StChain: list of chains for the stores we have generated
/// ST: store of a widen value
void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
/// Helper genWidenVectorTruncStores - Helper function to generate a set of
- /// stores to store a truncate widen vector into non widen memory
+ /// stores to store a truncate widen vector into non-widen memory
/// StChain: list of chains for the stores we have generated
/// ST: store of a widen value
void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index c749fdea9f..e9424f2cde 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -78,8 +78,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
InOp = GetWidenedVector(InOp);
EVT LoVT, HiVT;
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
- llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
+ std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
@@ -518,7 +518,7 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(Cond, CL, CH);
else
- llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl);
+ std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
}
Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
@@ -540,7 +540,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT LoVT, HiVT;
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getUNDEF(LoVT);
Hi = DAG.getUNDEF(HiVT);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2c3cdccb56..551d0549c8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -77,6 +77,10 @@ class VectorLegalizer {
// Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input
// operand to the next size up.
SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
+ // Implements FP_TO_[SU]INT vector promotion of the result type; it is
+ // promoted to the next size up integer type. The result is then truncated
+ // back to the original type.
+ SDValue PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned);
public:
bool Run();
@@ -88,7 +92,7 @@ bool VectorLegalizer::Run() {
// Before we start legalizing vector nodes, check if there are any vectors.
bool HasVectors = false;
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) {
+ E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
// Check if the values of the nodes contain vectors. We don't need to check
// the operands because we are going to check their values at some point.
for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
@@ -112,7 +116,7 @@ bool VectorLegalizer::Run() {
// node is only legalized after all of its operands are legalized.
DAG.AssignTopologicalOrder();
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+ E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
LegalizeOp(SDValue(I, 0));
// Finally, it's possible the root changed. Get the new root.
@@ -210,6 +214,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
+ case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
@@ -273,6 +278,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Result = PromoteVectorOpINT_TO_FP(Op);
Changed = true;
break;
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ // Promote the operation by extending the operand.
+ Result = PromoteVectorOpFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
+ Changed = true;
+ break;
}
break;
case TargetLowering::Legal: break;
@@ -351,14 +362,9 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
//
// Increase the bitwidth of the element to the next pow-of-two
// (which is greater than 8 bits).
- unsigned NumElts = VT.getVectorNumElements();
- EVT EltVT = VT.getVectorElementType();
- EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits());
- assert(EltVT.isSimple() && "Promoting to a non-simple vector type!");
-
- // Build a new vector type and check if it is legal.
- MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+ EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+ assert(NVT.isSimple() && "Promoting to a non-simple vector type!");
SDLoc dl(Op);
SmallVector<SDValue, 4> Operands(Op.getNumOperands());
@@ -375,6 +381,35 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
Operands.size());
}
+// For FP_TO_INT we promote the result type to a vector type with wider
+// elements and then truncate the result. This is different from the default
+// PromoteVector which uses bitcast to promote thus assumning that the
+// promoted vector type has the same overall size.
+SDValue VectorLegalizer::PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned) {
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+ EVT VT = Op.getValueType();
+
+ EVT NewVT;
+ unsigned NewOpc;
+ while (1) {
+ NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+ assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
+ NewOpc = ISD::FP_TO_SINT;
+ break;
+ }
+ if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) {
+ NewOpc = ISD::FP_TO_UINT;
+ break;
+ }
+ }
+
+ SDLoc loc(Op);
+ SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted);
+}
+
SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
SDLoc dl(Op);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f7a3e3d250..940a9c9059 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -65,6 +65,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
case ISD::ANY_EXTEND:
+ case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTPOP:
case ISD::CTTZ:
@@ -384,6 +385,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE:
Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
+ case ISD::FP_ROUND:
+ Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
+ break;
}
}
@@ -467,6 +471,15 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getOriginalAlignment(), N->getTBAAInfo());
}
+/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ N->getValueType(0).getVectorElementType(), Elt,
+ N->getOperand(1));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
//===----------------------------------------------------------------------===//
// Result Vector Splitting
@@ -521,6 +534,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
+ case ISD::BSWAP:
case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
case ISD::CTTZ:
@@ -624,7 +638,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
// We know the result is a vector. The input may be either a vector or a
// scalar value.
EVT LoVT, HiVT;
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
SDLoc dl(N);
SDValue InOp = N->getOperand(0);
@@ -679,7 +693,7 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc dl(N);
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
unsigned LoNumElts = LoVT.getVectorNumElements();
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
@@ -700,7 +714,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
}
EVT LoVT, HiVT;
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
@@ -716,7 +730,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT LoVT, HiVT;
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
@@ -740,7 +754,7 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT LoVT, HiVT;
- llvm::tie(LoVT, HiVT) =
+ std::tie(LoVT, HiVT) =
DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());
Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
@@ -804,7 +818,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc dl(N);
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
Hi = DAG.getUNDEF(HiVT);
}
@@ -814,7 +828,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
EVT LoVT, HiVT;
SDLoc dl(LD);
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
ISD::LoadExtType ExtType = LD->getExtensionType();
SDValue Ch = LD->getChain();
@@ -828,7 +842,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
const MDNode *TBAAInfo = LD->getTBAAInfo();
EVT LoMemVT, HiMemVT;
- llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
@@ -859,12 +873,12 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc DL(N);
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
// Split the input.
SDValue LL, LH, RL, RH;
- llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
@@ -875,7 +889,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
// Get the dest types - they may not match the input types, e.g. int_to_fp.
EVT LoVT, HiVT;
SDLoc dl(N);
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
// If the input also splits, handle it directly for a compile time speedup.
// Otherwise split it by hand.
@@ -883,7 +897,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
GetSplitVector(N->getOperand(0), Lo, Hi);
else
- llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
if (N->getOpcode() == ISD::FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
@@ -912,7 +926,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
EVT SrcVT = N->getOperand(0).getValueType();
EVT DestVT = N->getValueType(0);
EVT LoVT, HiVT;
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
// We can do better than a generic split operation if the extend is doing
// more than just doubling the width of the elements and the following are
@@ -938,7 +952,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
EVT SplitSrcVT =
EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2);
EVT SplitLoVT, SplitHiVT;
- llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
+ std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
DEBUG(dbgs() << "Split vector extend via incremental extend:";
@@ -947,7 +961,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
SDValue NewSrc =
DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
// Get the low and high halves of the new, extended one step, vector.
- llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+ std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
// Extend those vector halves the rest of the way.
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
@@ -1160,13 +1174,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
"Lo and Hi have differing types");
EVT LoOpVT, HiOpVT;
- llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
+ std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
assert(LoOpVT == HiOpVT && "Asymmetric vector split?");
SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
- llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
- llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
- llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
+ std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
+ std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
+ std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
SDValue LoSelect =
DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
@@ -1281,7 +1295,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
GetSplitVector(N->getOperand(1), Lo, Hi);
EVT LoMemVT, HiMemVT;
- llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
@@ -1370,7 +1384,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
// Extract the halves of the input via extract_subvector.
SDValue InLoVec, InHiVec;
- llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
+ std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
// Truncate them to 1/2 the element size.
EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
@@ -2180,6 +2194,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
Cond1 = GetWidenedVector(Cond1);
+ // If we have to split the condition there is no point in widening the
+ // select. This would result in an cycle of widening the select ->
+ // widening the condition operand -> splitting the condition operand ->
+ // splitting the select -> widening the select. Instead split this select
+ // further and widen the resulting type.
+ if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) {
+ SDValue SplitSelect = SplitVecOp_VSELECT(N, 0);
+ SDValue Res = ModifyToType(SplitSelect, WidenVT);
+ return Res;
+ }
+
if (Cond1.getValueType() != CondWidenVT)
Cond1 = ModifyToType(Cond1, CondWidenVT);
}
@@ -2251,7 +2276,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
SDValue InOp1 = N->getOperand(0);
EVT InVT = InOp1.getValueType();
- assert(InVT.isVector() && "can not widen non vector type");
+ assert(InVT.isVector() && "can not widen non-vector type");
EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
InVT.getVectorElementType(), WidenNumElts);
InOp1 = GetWidenedVector(InOp1);
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 1dd2128b8b..3b3424dfe0 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -603,7 +603,7 @@ SUnit *ResourcePriorityQueue::pop() {
std::vector<SUnit *>::iterator Best = Queue.begin();
if (!DisableDFASched) {
signed BestCost = SUSchedulingCost(*Best);
- for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
E = Queue.end(); I != E; ++I) {
if (SUSchedulingCost(*I) > BestCost) {
@@ -614,14 +614,14 @@ SUnit *ResourcePriorityQueue::pop() {
}
// Use default TD scheduling mechanism.
else {
- for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
E = Queue.end(); I != E; ++I)
if (Picker(*Best, *I))
Best = I;
}
SUnit *V = *Best;
- if (Best != prior(Queue.end()))
+ if (Best != std::prev(Queue.end()))
std::swap(*Best, Queue.back());
Queue.pop_back();
@@ -633,7 +633,7 @@ SUnit *ResourcePriorityQueue::pop() {
void ResourcePriorityQueue::remove(SUnit *SU) {
assert(!Queue.empty() && "Queue is empty!");
std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
- if (I != prior(Queue.end()))
+ if (I != std::prev(Queue.end()))
std::swap(*I, Queue.back());
Queue.pop_back();
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 4af7172847..b62bd623c4 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -15,8 +15,8 @@
#define LLVM_CODEGEN_SDNODEDBGVALUE_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/DebugLoc.h"
namespace llvm {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 6c5e0ab8b2..0687392a34 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -80,7 +80,7 @@ public:
ScheduleDAGFast(MachineFunction &mf)
: ScheduleDAGSDNodes(mf) {}
- void Schedule();
+ void Schedule() override;
/// AddPred - adds a predecessor edge to SUnit SU.
/// This returns true if this is a new predecessor.
@@ -107,7 +107,7 @@ private:
void ListScheduleBottomUp();
/// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
- bool forceUnitLatencies() const { return true; }
+ bool forceUnitLatencies() const override { return true; }
};
} // end anonymous namespace
@@ -646,9 +646,10 @@ class ScheduleDAGLinearize : public ScheduleDAGSDNodes {
public:
ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
- void Schedule();
+ void Schedule() override;
- MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+ MachineBasicBlock *
+ EmitSchedule(MachineBasicBlock::iterator &InsertPos) override;
private:
std::vector<SDNode*> Sequence;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 1a562d74b4..c28366456a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -177,7 +177,7 @@ public:
delete AvailableQueue;
}
- void Schedule();
+ void Schedule() override;
ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
@@ -261,7 +261,7 @@ private:
/// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
/// need actual latency information but the hybrid scheduler does.
- bool forceUnitLatencies() const {
+ bool forceUnitLatencies() const override {
return !NeedLatency;
}
};
@@ -1539,7 +1539,6 @@ template<class SF>
struct reverse_sort : public queue_sort {
SF &SortFunc;
reverse_sort(SF &sf) : SortFunc(sf) {}
- reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
bool operator()(SUnit* left, SUnit* right) const {
// reverse left/right rather than simply !SortFunc(left, right)
@@ -1559,7 +1558,6 @@ struct bu_ls_rr_sort : public queue_sort {
RegReductionPQBase *SPQ;
bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
- bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
bool operator()(SUnit* left, SUnit* right) const;
};
@@ -1574,8 +1572,6 @@ struct src_ls_rr_sort : public queue_sort {
RegReductionPQBase *SPQ;
src_ls_rr_sort(RegReductionPQBase *spq)
: SPQ(spq) {}
- src_ls_rr_sort(const src_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
bool operator()(SUnit* left, SUnit* right) const;
};
@@ -1590,8 +1586,6 @@ struct hybrid_ls_rr_sort : public queue_sort {
RegReductionPQBase *SPQ;
hybrid_ls_rr_sort(RegReductionPQBase *spq)
: SPQ(spq) {}
- hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
bool isReady(SUnit *SU, unsigned CurCycle) const;
@@ -1609,8 +1603,6 @@ struct ilp_ls_rr_sort : public queue_sort {
RegReductionPQBase *SPQ;
ilp_ls_rr_sort(RegReductionPQBase *spq)
: SPQ(spq) {}
- ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
bool isReady(SUnit *SU, unsigned CurCycle) const;
@@ -1675,13 +1667,13 @@ public:
return scheduleDAG->getHazardRec();
}
- void initNodes(std::vector<SUnit> &sunits);
+ void initNodes(std::vector<SUnit> &sunits) override;
- void addNode(const SUnit *SU);
+ void addNode(const SUnit *SU) override;
- void updateNode(const SUnit *SU);
+ void updateNode(const SUnit *SU) override;
- void releaseState() {
+ void releaseState() override {
SUnits = 0;
SethiUllmanNumbers.clear();
std::fill(RegPressure.begin(), RegPressure.end(), 0);
@@ -1695,26 +1687,26 @@ public:
return SU->getNode()->getIROrder();
}
- bool empty() const { return Queue.empty(); }
+ bool empty() const override { return Queue.empty(); }
- void push(SUnit *U) {
+ void push(SUnit *U) override {
assert(!U->NodeQueueId && "Node in the queue already");
U->NodeQueueId = ++CurQueueId;
Queue.push_back(U);
}
- void remove(SUnit *SU) {
+ void remove(SUnit *SU) override {
assert(!Queue.empty() && "Queue is empty!");
assert(SU->NodeQueueId != 0 && "Not in queue!");
std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
SU);
- if (I != prior(Queue.end()))
+ if (I != std::prev(Queue.end()))
std::swap(*I, Queue.back());
Queue.pop_back();
SU->NodeQueueId = 0;
}
- bool tracksRegPressure() const { return TracksRegPressure; }
+ bool tracksRegPressure() const override { return TracksRegPressure; }
void dumpRegPressure() const;
@@ -1724,9 +1716,9 @@ public:
int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
- void scheduledNode(SUnit *SU);
+ void scheduledNode(SUnit *SU) override;
- void unscheduledNode(SUnit *SU);
+ void unscheduledNode(SUnit *SU) override;
protected:
bool canClobber(const SUnit *SU, const SUnit *Op);
@@ -1738,12 +1730,12 @@ protected:
template<class SF>
static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
std::vector<SUnit *>::iterator Best = Q.begin();
- for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+ for (std::vector<SUnit *>::iterator I = std::next(Q.begin()),
E = Q.end(); I != E; ++I)
if (Picker(*Best, *I))
Best = I;
SUnit *V = *Best;
- if (Best != prior(Q.end()))
+ if (Best != std::prev(Q.end()))
std::swap(*Best, Q.back());
Q.pop_back();
return V;
@@ -1776,13 +1768,13 @@ public:
tii, tri, tli),
Picker(this) {}
- bool isBottomUp() const { return SF::IsBottomUp; }
+ bool isBottomUp() const override { return SF::IsBottomUp; }
- bool isReady(SUnit *U) const {
+ bool isReady(SUnit *U) const override {
return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
}
- SUnit *pop() {
+ SUnit *pop() override {
if (Queue.empty()) return NULL;
SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 054e3dd840..5639894d09 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -738,13 +738,13 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() ||
// Fast-isel may have inserted some instructions, in which case the
// BB->back().isPHI() test will not fire when we want it to.
- prior(Emitter.getInsertPos())->isPHI()) {
+ std::prev(Emitter.getInsertPos())->isPHI()) {
// Did not insert any instruction.
Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
return;
}
- Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+ Orders.push_back(std::make_pair(Order, std::prev(Emitter.getInsertPos())));
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 2ff37e0a15..5e11dbb5fb 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -117,13 +117,13 @@ namespace llvm {
virtual MachineBasicBlock*
EmitSchedule(MachineBasicBlock::iterator &InsertPos);
- virtual void dumpNode(const SUnit *SU) const;
+ void dumpNode(const SUnit *SU) const override;
void dumpSchedule() const;
- virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+ std::string getGraphNodeLabel(const SUnit *SU) const override;
- virtual std::string getDAGName() const;
+ std::string getDAGName() const override;
virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 58aa1fe0eb..fb861030de 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -80,7 +80,7 @@ public:
delete AvailableQueue;
}
- void Schedule();
+ void Schedule() override;
private:
void releaseSucc(SUnit *SU, const SDep &D);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45d5a4fa69..d11ce80424 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -18,17 +18,15 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
@@ -179,6 +177,22 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
return true;
}
+/// \brief Return true if the specified node is a BUILD_VECTOR node of
+/// all ConstantSDNode or undef.
+bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Op = N->getOperand(i);
+ if (Op.getOpcode() == ISD::UNDEF)
+ continue;
+ if (!isa<ConstantSDNode>(Op))
+ return false;
+ }
+ return true;
+}
+
/// isScalarToVector - Return true if the specified node is a
/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
/// element is not an undef.
@@ -217,6 +231,21 @@ bool ISD::allOperandsUndef(const SDNode *N) {
return true;
}
+ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ return ISD::ANY_EXTEND;
+ case ISD::SEXTLOAD:
+ return ISD::SIGN_EXTEND;
+ case ISD::ZEXTLOAD:
+ return ISD::ZERO_EXTEND;
+ default:
+ break;
+ }
+
+ llvm_unreachable("Invalid LoadExtType");
+}
+
/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
/// when given the operation for (X op Y).
ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
@@ -369,9 +398,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
llvm_unreachable("Should only be used on nodes with operands");
default: break; // Normal nodes don't need extra info.
case ISD::TargetConstant:
- case ISD::Constant:
- ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
+ case ISD::Constant: {
+ const ConstantSDNode *C = cast<ConstantSDNode>(N);
+ ID.AddPointer(C->getConstantIntValue());
+ ID.AddBoolean(C->isOpaque());
break;
+ }
case ISD::TargetConstantFP:
case ISD::ConstantFP: {
ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
@@ -869,7 +901,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
- : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL),
+ : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(0), OptLevel(OL),
EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
UpdateListeners(0) {
@@ -877,10 +909,8 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
DbgInfo = new SDDbgInfo();
}
-void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti,
- const TargetLowering *tli) {
+void SelectionDAG::init(MachineFunction &mf, const TargetLowering *tli) {
MF = &mf;
- TTI = tti;
TLI = tli;
Context = &mf.getFunction()->getContext();
}
@@ -956,19 +986,21 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {
return getNode(ISD::XOR, DL, VT, Val, NegOne);
}
-SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
+SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) {
EVT EltVT = VT.getScalarType();
assert((EltVT.getSizeInBits() >= 64 ||
(uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
"getConstant with a uint64_t value that doesn't fit in the type!");
- return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
+ return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT, isO);
}
-SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
- return getConstant(*ConstantInt::get(*Context, Val), VT, isT);
+SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT, bool isO)
+{
+ return getConstant(*ConstantInt::get(*Context, Val), VT, isT, isO);
}
-SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
+ bool isO) {
assert(VT.isInteger() && "Cannot create FP integer constant!");
EVT EltVT = VT.getScalarType();
@@ -1010,7 +1042,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
.trunc(ViaEltSizeInBits),
- ViaEltVT, isT));
+ ViaEltVT, isT, isO));
}
// EltParts is currently in little endian order. If we actually want
@@ -1041,6 +1073,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
ID.AddPointer(Elt);
+ ID.AddBoolean(isO);
void *IP = 0;
SDNode *N = NULL;
if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
@@ -1048,7 +1081,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
return SDValue(N, 0);
if (!N) {
- N = new (NodeAllocator) ConstantSDNode(isT, Elt, EltVT);
+ N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
}
@@ -1139,7 +1172,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
if (!GVar) {
// If GV is an alias then use the aliasee for determining thread-localness.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasedGlobal());
}
unsigned Opc;
@@ -2502,17 +2535,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
EVT VT, SDValue Operand) {
- // Constant fold unary operations with an integer constant operand.
+ // Constant fold unary operations with an integer constant operand. Even
+ // opaque constant will be folded, because the folding of unary operations
+ // doesn't create new constants with different values. Nevertheless, the
+ // opaque flag is preserved during folding to prevent future folding with
+ // other constants.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
const APInt &Val = C->getAPIntValue();
switch (Opcode) {
default: break;
case ISD::SIGN_EXTEND:
- return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT,
+ C->isTargetOpcode(), C->isOpaque());
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::TRUNCATE:
- return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT,
+ C->isTargetOpcode(), C->isOpaque());
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
APFloat apf(EVTToAPFloatSemantics(VT),
@@ -2529,15 +2568,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT);
break;
case ISD::BSWAP:
- return getConstant(Val.byteSwap(), VT);
+ return getConstant(Val.byteSwap(), VT, C->isTargetOpcode(),
+ C->isOpaque());
case ISD::CTPOP:
- return getConstant(Val.countPopulation(), VT);
+ return getConstant(Val.countPopulation(), VT, C->isTargetOpcode(),
+ C->isOpaque());
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- return getConstant(Val.countLeadingZeros(), VT);
+ return getConstant(Val.countLeadingZeros(), VT, C->isTargetOpcode(),
+ C->isOpaque());
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- return getConstant(Val.countTrailingZeros(), VT);
+ return getConstant(Val.countTrailingZeros(), VT, C->isTargetOpcode(),
+ C->isOpaque());
}
}
@@ -2774,10 +2817,13 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);
ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);
- if (Scalar1 && Scalar2) {
+ if (Scalar1 && Scalar2 && (Scalar1->isOpaque() || Scalar2->isOpaque()))
+ return SDValue();
+
+ if (Scalar1 && Scalar2)
// Scalar instruction.
Inputs.push_back(std::make_pair(Scalar1, Scalar2));
- } else {
+ else {
// For vectors extract each constant element into Inputs so we can constant
// fold them individually.
BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
@@ -2793,6 +2839,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
if (!V1 || !V2) // Not a constant, bail.
return SDValue();
+ if (V1->isOpaque() || V2->isOpaque())
+ return SDValue();
+
// Avoid BUILD_VECTOR nodes that perform implicit truncation.
// FIXME: This is valid and could be handled by truncating the APInts.
if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
@@ -3546,10 +3595,10 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
}
- // If the "cost" of materializing the integer immediate is 1 or free, then
- // it is cost effective to turn the load into the immediate.
- const TargetTransformInfo *TTI = DAG.getTargetTransformInfo();
- if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2)
+ // If the "cost" of materializing the integer immediate is less than the cost
+ // of a load, then it is cost effective to turn the load into the immediate.
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty))
return DAG.getConstant(Val, VT);
return SDValue(0, 0);
}
@@ -3609,8 +3658,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
DAG.getMachineFunction());
if (VT == MVT::Other) {
- if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() ||
- TLI.allowsUnalignedMemoryAccesses(VT)) {
+ unsigned AS = 0;
+ if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) ||
+ TLI.allowsUnalignedMemoryAccesses(VT, AS)) {
VT = TLI.getPointerTy();
} else {
switch (DstAlign & 7) {
@@ -3667,9 +3717,10 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
// FIXME: Only does this for 64-bit or more since we don't have proper
// cost model for unaligned load / store.
bool Fast;
+ unsigned AS = 0;
if (NumMemOps && AllowOverlap &&
VTSize >= 8 && NewVTSize < Size &&
- TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast)
+ TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast)
VTSize = Size;
else {
VT = NewVT;
@@ -4182,9 +4233,10 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
- SDVTList VTList, SDValue* Ops, unsigned NumOps,
+ SDVTList VTList, SDValue *Ops, unsigned NumOps,
MachineMemOperand *MMO,
- AtomicOrdering Ordering,
+ AtomicOrdering SuccessOrdering,
+ AtomicOrdering FailureOrdering,
SynchronizationScope SynchScope) {
FoldingSetNodeID ID;
ID.AddInteger(MemVT.getRawBits());
@@ -4206,17 +4258,28 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(),
dl.getDebugLoc(), VTList, MemVT,
Ops, DynOps, NumOps, MMO,
- Ordering, SynchScope);
+ SuccessOrdering, FailureOrdering,
+ SynchScope);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
+ SDVTList VTList, SDValue *Ops, unsigned NumOps,
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ return getAtomic(Opcode, dl, MemVT, VTList, Ops, NumOps, MMO, Ordering,
+ Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Cmp,
SDValue Swp, MachinePointerInfo PtrInfo,
unsigned Alignment,
- AtomicOrdering Ordering,
+ AtomicOrdering SuccessOrdering,
+ AtomicOrdering FailureOrdering,
SynchronizationScope SynchScope) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
@@ -4237,14 +4300,15 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO,
- Ordering, SynchScope);
+ SuccessOrdering, FailureOrdering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDValue Chain,
SDValue Ptr, SDValue Cmp,
SDValue Swp, MachineMemOperand *MMO,
- AtomicOrdering Ordering,
+ AtomicOrdering SuccessOrdering,
+ AtomicOrdering FailureOrdering,
SynchronizationScope SynchScope) {
assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
@@ -4253,7 +4317,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
- return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, SuccessOrdering,
+ FailureOrdering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
@@ -5633,7 +5698,7 @@ class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
SDNode::use_iterator &UI;
SDNode::use_iterator &UE;
- virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ void NodeDeleted(SDNode *N, SDNode *E) override {
// Increment the iterator as needed.
while (UI != UE && N == *UI)
++UI;
@@ -6457,7 +6522,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
unsigned &SplatBitSize,
bool &HasAnyUndefs,
unsigned MinSplatBits,
- bool isBigEndian) {
+ bool isBigEndian) const {
EVT VT = getValueType(0);
assert(VT.isVector() && "Expected a vector type");
unsigned sz = VT.getSizeInBits();
@@ -6518,6 +6583,27 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
return true;
}
+ConstantSDNode *BuildVectorSDNode::getConstantSplatValue() const {
+ SDValue Op0 = getOperand(0);
+ if (Op0.getOpcode() != ISD::Constant)
+ return nullptr;
+
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i) != Op0)
+ return nullptr;
+
+ return cast<ConstantSDNode>(Op0);
+}
+
+bool BuildVectorSDNode::isConstant() const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ unsigned Opc = getOperand(i).getOpcode();
+ if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP)
+ return false;
+ }
+ return true;
+}
+
bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
// Find the first non-undef value in the shuffle mask.
unsigned i, e;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2b2713d248..4a6e5cf036 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -34,10 +34,10 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/StackMaps.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
@@ -214,6 +214,20 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
llvm_unreachable("Unknown mismatch!");
}
+static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
+ const Twine &ErrMsg) {
+ const Instruction *I = dyn_cast_or_null<Instruction>(V);
+ if (!V)
+ return Ctx.emitError(ErrMsg);
+
+ const char *AsmError = ", possible invalid constraint for vector type";
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ return Ctx.emitError(I, ErrMsg + AsmError);
+
+ return Ctx.emitError(I, ErrMsg);
+}
+
/// getCopyFromPartsVector - Create a value that contains the specified legal
/// parts combined into the value they represent. If the parts combine to a
/// type larger then ValueVT then AssertOp can be used to specify whether the
@@ -306,16 +320,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
// Handle cases such as i8 -> <1 x i1>
if (ValueVT.getVectorNumElements() != 1) {
- LLVMContext &Ctx = *DAG.getContext();
- Twine ErrMsg("non-trivial scalar-to-vector conversion");
- if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
- if (const CallInst *CI = dyn_cast<CallInst>(I))
- if (isa<InlineAsm>(CI->getCalledValue()))
- ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
- Ctx.emitError(I, ErrMsg);
- } else {
- Ctx.emitError(ErrMsg);
- }
+ diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
+ "non-trivial scalar-to-vector conversion");
return DAG.getUNDEF(ValueVT);
}
@@ -397,18 +403,9 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
"Failed to tile the value with PartVT!");
if (NumParts == 1) {
- if (PartEVT != ValueVT) {
- LLVMContext &Ctx = *DAG.getContext();
- Twine ErrMsg("scalar-to-vector conversion failed");
- if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
- if (const CallInst *CI = dyn_cast<CallInst>(I))
- if (isa<InlineAsm>(CI->getCalledValue()))
- ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
- Ctx.emitError(I, ErrMsg);
- } else {
- Ctx.emitError(ErrMsg);
- }
- }
+ if (PartEVT != ValueVT)
+ diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
+ "scalar-to-vector conversion failed");
Parts[0] = Val;
return;
@@ -627,16 +624,6 @@ namespace {
}
}
- /// areValueTypesLegal - Return true if types of all the values are legal.
- bool areValueTypesLegal(const TargetLowering &TLI) {
- for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
- MVT RegisterVT = RegVTs[Value];
- if (!TLI.isTypeLegal(RegisterVT))
- return false;
- }
- return true;
- }
-
/// append - Add the specified values to this one.
void append(const RegsForValue &RHS) {
ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
@@ -851,12 +838,20 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
Ops.push_back(Res);
+ unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
MVT RegisterVT = RegVTs[Value];
for (unsigned i = 0; i != NumRegs; ++i) {
assert(Reg < Regs.size() && "Mismatch in # registers expected");
- Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+ unsigned TheReg = Regs[Reg++];
+ Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
+
+ if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
+ // If we clobbered the stack pointer, MFI should know about it.
+ assert(DAG.getMachineFunction().getFrameInfo()->
+ hasInlineAsmWithSPAdjust());
+ }
}
}
}
@@ -866,7 +861,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
AA = &aa;
GFI = gfi;
LibInfo = li;
- TD = DAG.getTarget().getDataLayout();
+ DL = DAG.getTarget().getDataLayout();
Context = DAG.getContext();
LPadToCallSiteMap.clear();
}
@@ -884,6 +879,7 @@ void SelectionDAGBuilder::clear() {
PendingExports.clear();
CurInst = NULL;
HasTailCall = false;
+ SDNodeOrder = LowestSDNodeOrder;
}
/// clearDanglingDebugInfo - Clear the dangling debug information
@@ -1384,7 +1380,9 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
- MachineBasicBlock *SwitchBB) {
+ MachineBasicBlock *SwitchBB,
+ uint32_t TWeight,
+ uint32_t FWeight) {
const BasicBlock *BB = CurBB->getBasicBlock();
// If the leaf of the tree is a comparison, merge the condition into
@@ -1409,7 +1407,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
}
CaseBlock CB(Condition, BOp->getOperand(0),
- BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+ BOp->getOperand(1), NULL, TBB, FBB, CurBB, TWeight, FWeight);
SwitchCases.push_back(CB);
return;
}
@@ -1417,17 +1415,26 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
// Create a CaseBlock record representing this branch.
CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
- NULL, TBB, FBB, CurBB);
+ NULL, TBB, FBB, CurBB, TWeight, FWeight);
SwitchCases.push_back(CB);
}
+/// Scale down both weights to fit into uint32_t.
+static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
+ uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
+ uint32_t Scale = (NewMax / UINT32_MAX) + 1;
+ NewTrue = NewTrue / Scale;
+ NewFalse = NewFalse / Scale;
+}
+
/// FindMergedConditions - If Cond is an expression like
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- unsigned Opc) {
+ unsigned Opc, uint32_t TWeight,
+ uint32_t FWeight) {
// If this node is not part of the or/and tree, emit it as a branch.
const Instruction *BOp = dyn_cast<Instruction>(Cond);
if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
@@ -1435,7 +1442,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
BOp->getParent() != CurBB->getBasicBlock() ||
!InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
- EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
+ EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
+ TWeight, FWeight);
return;
}
@@ -1447,6 +1455,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
if (Opc == Instruction::Or) {
// Codegen X | Y as:
+ // BB1:
// jmp_if_X TBB
// jmp TmpBB
// TmpBB:
@@ -1454,14 +1463,34 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
// jmp FBB
//
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for orignal BB.
+ // Assuming the orignal weights are A and B, one choice is to set BB1's
+ // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
+ // assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+
+ uint64_t NewTrueWeight = TWeight;
+ uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight;
+ ScaleWeights(NewTrueWeight, NewFalseWeight);
// Emit the LHS condition.
- FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
+ FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
+ NewTrueWeight, NewFalseWeight);
+ NewTrueWeight = TWeight;
+ NewFalseWeight = 2 * (uint64_t)FWeight;
+ ScaleWeights(NewTrueWeight, NewFalseWeight);
// Emit the RHS condition into TmpBB.
- FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
+ NewTrueWeight, NewFalseWeight);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
+ // BB1:
// jmp_if_X TmpBB
// jmp FBB
// TmpBB:
@@ -1470,11 +1499,28 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
//
// This requires creation of TmpBB after CurBB.
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for orignal BB.
+ // Assuming the orignal weights are A and B, one choice is to set BB1's
+ // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
+ // assumes that
+ // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
+
+ uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight;
+ uint64_t NewFalseWeight = FWeight;
+ ScaleWeights(NewTrueWeight, NewFalseWeight);
// Emit the LHS condition.
- FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
+ FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
+ NewTrueWeight, NewFalseWeight);
+ NewTrueWeight = 2 * (uint64_t)TWeight;
+ NewFalseWeight = FWeight;
+ ScaleWeights(NewTrueWeight, NewFalseWeight);
// Emit the RHS condition into TmpBB.
- FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
+ NewTrueWeight, NewFalseWeight);
}
}
@@ -1525,8 +1571,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// Update machine-CFG edges.
BrMBB->addSuccessor(Succ0MBB);
- // If this is not a fall-through branch, emit the branch.
- if (Succ0MBB != NextBlock)
+ // If this is not a fall-through branch or optimizations are switched off,
+ // emit the branch.
+ if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None)
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
MVT::Other, getControlRoot(),
DAG.getBasicBlock(Succ0MBB)));
@@ -1561,7 +1608,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
(BOp->getOpcode() == Instruction::And ||
BOp->getOpcode() == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
- BOp->getOpcode());
+ BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB),
+ getEdgeWeight(BrMBB, Succ1MBB));
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
@@ -2351,7 +2399,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
volatile double RDensity =
(double)RSize.roundToDouble() /
(Last - RBegin + 1ULL).roundToDouble();
- double Metric = Range.logBase2()*(LDensity+RDensity);
+ volatile double Metric = Range.logBase2()*(LDensity+RDensity);
// Should always split in some non-trivial place
DEBUG(dbgs() <<"=>Step\n"
<< "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
@@ -2590,7 +2638,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
if (Cases.size() >= 2)
// Must recompute end() each iteration because it may be
// invalidated by erase if we hold on to it
- for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+ for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
J != Cases.end(); ) {
const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
@@ -2936,6 +2984,13 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
if (DestVT != N.getValueType())
setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(),
DestVT, N)); // convert types.
+ // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
+ // might fold any kind of constant expression to an integer constant and that
+ // is not what we are looking for. Only regcognize a bitcast of a genuine
+ // constant integer as an opaque constant.
+ else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
+ setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false,
+ /*isOpaque*/true));
else
setValue(&I, N); // noop cast.
}
@@ -3261,7 +3316,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
if (Field) {
// N = N + Offset
- uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+ uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
DAG.getConstant(Offset, N.getValueType()));
}
@@ -3275,7 +3330,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
uint64_t Offs =
- TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
SDValue OffsVal;
EVT PTy = TLI->getPointerTy(AS);
unsigned PtrBits = PTy.getSizeInBits();
@@ -3292,7 +3347,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// N = N + Idx * ElementSize;
APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS),
- TD->getTypeAllocSize(Ty));
+ DL->getTypeAllocSize(Ty));
SDValue IdxN = getValue(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend
@@ -3370,9 +3425,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
setValue(&I, DSA);
DAG.setRoot(DSA.getValue(1));
- // Inform the Frame Information that we have just allocated a variable-sized
- // object.
- FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
+ assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects());
}
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
@@ -3400,7 +3453,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue Root;
bool ConstantMemory = false;
- if (I.isVolatile() || NumValues > MaxParallelChains)
+ if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
else if (AA->pointsToConstantMemory(
@@ -3413,6 +3466,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Root = DAG.getRoot();
}
+ const TargetLowering *TLI = TM.getTargetLowering();
+ if (isVolatile)
+ Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG);
+
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
NumValues));
@@ -3536,14 +3593,15 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
- AtomicOrdering Order = I.getOrdering();
+ AtomicOrdering SuccessOrder = I.getSuccessOrdering();
+ AtomicOrdering FailureOrder = I.getFailureOrdering();
SynchronizationScope Scope = I.getSynchScope();
SDValue InChain = getRoot();
const TargetLowering *TLI = TM.getTargetLowering();
if (TLI->getInsertFencesForAtomic())
- InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl,
DAG, *TLI);
SDValue L =
@@ -3554,13 +3612,14 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
getValue(I.getCompareOperand()),
getValue(I.getNewValOperand()),
MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
- TLI->getInsertFencesForAtomic() ? Monotonic : Order,
+ TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder,
+ TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder,
Scope);
SDValue OutChain = L.getValue(1);
if (TLI->getInsertFencesForAtomic())
- OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl,
DAG, *TLI);
setValue(&I, L);
@@ -3637,6 +3696,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
if (I.getAlignment() < VT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
+ InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue L =
DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
getValue(I.getPointerOperand()),
@@ -5283,7 +5343,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
SmallVector<Value *, 4> Allocas;
- GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD);
+ GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL);
for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
E = Allocas.end(); Object != E; ++Object) {
@@ -5324,6 +5384,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
(void)getControlRoot();
return 0;
}
+ case Intrinsic::clear_cache:
+ return TLI->getClearCacheBuiltinName();
case Intrinsic::donothing:
// ignore
return 0;
@@ -5366,6 +5428,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
int DemoteStackIdx = -100;
if (!CanLowerReturn) {
+ assert(!CS.hasInAllocaArgument() &&
+ "sret demotion is incompatible with inalloca");
uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
FTy->getReturnType());
unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(
@@ -5508,9 +5572,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
/// value is equal or not-equal to zero.
static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
- for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
- UI != E; ++UI) {
- if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ for (const User *U : V->users()) {
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
if (IC->isEquality())
if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
if (C->isNullValue())
@@ -5534,7 +5597,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
if (const Constant *LoadCst =
ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
- Builder.TD))
+ Builder.DL))
return Builder.getValue(LoadCst);
}
@@ -5653,9 +5716,13 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
// bloat the code.
const TargetLowering *TLI = TM.getTargetLowering();
if (ActuallyDoIt && CSize->getZExtValue() > 4) {
+ unsigned DstAS = LHS->getType()->getPointerAddressSpace();
+ unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
- if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT))
+ if (!TLI->isTypeLegal(LoadVT) ||
+ !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) ||
+ !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS))
ActuallyDoIt = false;
}
@@ -6026,7 +6093,7 @@ public:
/// MVT::Other.
EVT getCallOperandValEVT(LLVMContext &Context,
const TargetLowering &TLI,
- const DataLayout *TD) const {
+ const DataLayout *DL) const {
if (CallOperandVal == 0) return MVT::Other;
if (isa<BasicBlock>(CallOperandVal))
@@ -6052,7 +6119,7 @@ public:
// If OpTy is not a single value, it may be a struct/union that we
// can tile with integers.
if (!OpTy->isSingleValueType() && OpTy->isSized()) {
- unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ unsigned BitSize = DL->getTypeSizeInBits(OpTy);
switch (BitSize) {
default: break;
case 1:
@@ -6108,7 +6175,7 @@ static void GetRegistersForValue(SelectionDAG &DAG,
// types are identical size, use a bitcast to convert (e.g. two differing
// vector types).
MVT RegVT = *PhysReg.second->vt_begin();
- if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+ if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) {
OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
@@ -6241,7 +6308,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
}
- OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD).
+ OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL).
getSimpleVT();
}
@@ -6716,11 +6783,11 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
const TargetLowering *TLI = TM.getTargetLowering();
- const DataLayout &TD = *TLI->getDataLayout();
+ const DataLayout &DL = *TLI->getDataLayout();
SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(),
getRoot(), getValue(I.getOperand(0)),
DAG.getSrcValue(I.getOperand(0)),
- TD.getABITypeAlignment(I.getType()));
+ DL.getABITypeAlignment(I.getType()));
setValue(&I, V);
DAG.setRoot(V.getValue(1));
}
@@ -6781,6 +6848,42 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
return TLI->LowerCallTo(CLI);
}
+/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
+/// or patchpoint target node's operand list.
+///
+/// Constants are converted to TargetConstants purely as an optimization to
+/// avoid constant materialization and register allocation.
+///
+/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
+/// generate addess computation nodes, and so ExpandISelPseudo can convert the
+/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
+/// address materialization and register allocation, but may also be required
+/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
+/// alloca in the entry block, then the runtime may assume that the alloca's
+/// StackMap location can be read immediately after compilation and that the
+/// location is valid at any point during execution (this is similar to the
+/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
+/// only available in a register, then the runtime would need to trap when
+/// execution reaches the StackMap in order to read the alloca's location.
+static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx,
+ SmallVectorImpl<SDValue> &Ops,
+ SelectionDAGBuilder &Builder) {
+ for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) {
+ SDValue OpVal = Builder.getValue(CI.getArgOperand(i));
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
+ const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
+ Ops.push_back(
+ Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy()));
+ } else
+ Ops.push_back(OpVal);
+ }
+}
+
/// \brief Lower llvm.experimental.stackmap directly to its target opcode.
void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
// void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
@@ -6788,61 +6891,64 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
- SDValue Callee = getValue(CI.getCalledValue());
+ SDValue Chain, InFlag, Callee, NullPtr;
+ SmallVector<SDValue, 32> Ops;
- // Lower into a call sequence with no args and no return value.
- std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee);
- // Set the root to the target-lowered call chain.
- SDValue Chain = Result.second;
- DAG.setRoot(Chain);
+ SDLoc DL = getCurSDLoc();
+ Callee = getValue(CI.getCalledValue());
+ NullPtr = DAG.getIntPtrConstant(0, true);
- /// Get a call instruction from the call sequence chain.
- /// Tail calls are not allowed.
- SDNode *CallEnd = Chain.getNode();
- assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
- "Expected a callseq node.");
- SDNode *Call = CallEnd->getOperand(0).getNode();
- bool hasGlue = Call->getGluedNode();
+ // The stackmap intrinsic only records the live variables (the arguemnts
+ // passed to it) and emits NOPS (if requested). Unlike the patchpoint
+ // intrinsic, this won't be lowered to a function call. This means we don't
+ // have to worry about calling conventions and target specific lowering code.
+ // Instead we perform the call lowering right here.
+ //
+ // chain, flag = CALLSEQ_START(chain, 0)
+ // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
+ // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
+ //
+ Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL);
+ InFlag = Chain.getValue(1);
- // Replace the target specific call node with the stackmap intrinsic.
- SmallVector<SDValue, 8> Ops;
+ // Add the <id> and <numBytes> constants.
+ SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
+ SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
- // Add the <id> and <numShadowBytes> constants.
- for (unsigned i = 0; i < 2; ++i) {
- SDValue tmp = getValue(CI.getOperand(i));
- Ops.push_back(DAG.getTargetConstant(
- cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32));
- }
// Push live variables for the stack map.
- for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i)
- Ops.push_back(getValue(CI.getArgOperand(i)));
+ addStackMapLiveVars(CI, 2, Ops, *this);
- // Push the chain (this is originally the first operand of the call, but
- // becomes now the last or second to last operand).
- Ops.push_back(*(Call->op_begin()));
+ // We are not pushing any register mask info here on the operands list,
+ // because the stackmap doesn't clobber anything.
- // Push the glue flag (last operand).
- if (hasGlue)
- Ops.push_back(*(Call->op_end()-1));
+ // Push the chain and the glue flag.
+ Ops.push_back(Chain);
+ Ops.push_back(InFlag);
+ // Create the STACKMAP node.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
+ Chain = SDValue(SM, 0);
+ InFlag = Chain.getValue(1);
- // Replace the target specific call node with a STACKMAP node.
- MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(),
- NodeTys, Ops);
+ Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
- // StackMap generates no value, so nothing goes in the NodeMap.
+ // Stackmaps don't generate values, so nothing goes into the NodeMap.
- // Fixup the consumers of the intrinsic. The chain and glue may be used in the
- // call sequence.
- DAG.ReplaceAllUsesWith(Call, MN);
+ // Set the root to the target-lowered call chain.
+ DAG.setRoot(Chain);
- DAG.DeleteNode(Call);
+ // Inform the Frame Information that we have a stackmap in this function.
+ FuncInfo.MF->getFrameInfo()->setHasStackMap();
}
/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
- // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>,
+ // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
// i32 <numBytes>,
// i8* <target>,
// i32 <numArgs>,
@@ -6855,17 +6961,19 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
SDValue Callee = getValue(CI.getOperand(2)); // <target>
// Get the real number of arguments participating in the call <numArgs>
- unsigned NumArgs =
- cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue();
+ SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos));
+ unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
- assert(CI.getNumArgOperands() >= NumArgs + 4 &&
+ // Intrinsics include all meta-operands up to but not including CC.
+ unsigned NumMetaOpers = PatchPointOpers::CCPos;
+ assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs &&
"Not enough arguments provided to the patchpoint intrinsic");
// For AnyRegCC the arguments are lowered later on manually.
unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs;
std::pair<SDValue, SDValue> Result =
- LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC);
+ LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC);
// Set the root to the target-lowered call chain.
SDValue Chain = Result.second;
@@ -6885,13 +6993,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
// Replace the target specific call node with the patchable intrinsic.
SmallVector<SDValue, 8> Ops;
- // Add the <id> and <numNopBytes> constants.
- for (unsigned i = 0; i < 2; ++i) {
- SDValue tmp = getValue(CI.getOperand(i));
- Ops.push_back(DAG.getTargetConstant(
- cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32));
- }
+ // Add the <id> and <numBytes> constants.
+ SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
+ SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
+
// Assume that the Callee is a constant address.
+ // FIXME: handle function symbols in the future.
Ops.push_back(
DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(),
/*isTarget=*/true));
@@ -6909,25 +7020,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
// Add the arguments we omitted previously. The register allocator should
// place these in any free register.
if (isAnyRegCC)
- for (unsigned i = 4, e = NumArgs + 4; i != e; ++i)
+ for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
Ops.push_back(getValue(CI.getArgOperand(i)));
- // Push the arguments from the call instruction.
+ // Push the arguments from the call instruction up to the register mask.
SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1;
for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i)
Ops.push_back(*i);
// Push live variables for the stack map.
- for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) {
- SDValue OpVal = getValue(CI.getArgOperand(i));
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
- Ops.push_back(
- DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
- Ops.push_back(
- DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
- } else
- Ops.push_back(OpVal);
- }
+ addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this);
// Push the register mask info.
if (hasGlue)
@@ -6981,6 +7083,9 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
} else
DAG.ReplaceAllUsesWith(Call, MN);
DAG.DeleteNode(Call);
+
+ // Inform the Frame Information that we have a patchpoint in this function.
+ FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
}
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
@@ -7037,8 +7142,18 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setInReg();
if (Args[i].isSRet)
Flags.setSRet();
- if (Args[i].isByVal) {
+ if (Args[i].isByVal)
Flags.setByVal();
+ if (Args[i].isInAlloca) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Args[i].isByVal || Args[i].isInAlloca) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
@@ -7202,12 +7317,10 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
return A->use_empty();
const BasicBlock *Entry = A->getParent()->begin();
- for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
- UI != E; ++UI) {
- const User *U = *UI;
+ for (const User *U : A->users())
if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
return false; // Use not in entry block.
- }
+
return true;
}
@@ -7215,7 +7328,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
SelectionDAG &DAG = SDB->DAG;
SDLoc dl = SDB->getCurSDLoc();
const TargetLowering *TLI = getTargetLowering();
- const DataLayout *TD = TLI->getDataLayout();
+ const DataLayout *DL = TLI->getDataLayout();
SmallVector<ISD::InputArg, 16> Ins;
if (!FuncInfo->CanLowerReturn) {
@@ -7247,7 +7360,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
ISD::ArgFlagsTy Flags;
unsigned OriginalAlignment =
- TD->getABITypeAlignment(ArgTy);
+ DL->getABITypeAlignment(ArgTy);
if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
Flags.setZExt();
@@ -7257,11 +7370,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setInReg();
if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
Flags.setSRet();
- if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
+ if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
Flags.setByVal();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Flags.isByVal() || Flags.isInAlloca()) {
PointerType *Ty = cast<PointerType>(I->getType());
Type *ElementTy = Ty->getElementType();
- Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
+ Flags.setByValSize(DL->getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
unsigned FrameAlign;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 835f643cc1..66835bf557 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,9 +18,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
#include <vector>
@@ -57,6 +56,7 @@ class MachineBasicBlock;
class MachineInstr;
class MachineRegisterInfo;
class MDNode;
+class MVT;
class PHINode;
class PtrToIntInst;
class ReturnInst;
@@ -488,8 +488,12 @@ private:
private:
const TargetMachine &TM;
public:
+ /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling
+ /// nodes without a corresponding SDNode.
+ static const unsigned LowestSDNodeOrder = 1;
+
SelectionDAG &DAG;
- const DataLayout *TD;
+ const DataLayout *DL;
AliasAnalysis *AA;
const TargetLibraryInfo *LibInfo;
@@ -534,7 +538,7 @@ public:
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
CodeGenOpt::Level ol)
- : CurInst(NULL), SDNodeOrder(0), TM(dag.getTarget()),
+ : CurInst(NULL), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
HasTailCall(false) {
}
@@ -608,11 +612,13 @@ public:
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
- MachineBasicBlock *SwitchBB, unsigned Opc);
+ MachineBasicBlock *SwitchBB, unsigned Opc,
+ uint32_t TW, uint32_t FW);
void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
- MachineBasicBlock *SwitchBB);
+ MachineBasicBlock *SwitchBB,
+ uint32_t TW, uint32_t FW);
bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
void CopyToExportRegsIfNeeded(const Value *V);
@@ -627,7 +633,7 @@ public:
bool useVoidTy = false);
/// UpdateSplitBlock - When an MBB was split during scheduling, update the
- /// references that ned to refer to the last resulting block.
+ /// references that need to refer to the last resulting block.
void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
private:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index c04a08d57f..535febae0f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -14,11 +14,10 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
@@ -82,7 +81,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::VALUETYPE: return "ValueType";
case ISD::Register: return "Register";
case ISD::RegisterMask: return "RegisterMask";
- case ISD::Constant: return "Constant";
+ case ISD::Constant:
+ if (cast<ConstantSDNode>(this)->isOpaque())
+ return "OpaqueConstant";
+ return "Constant";
case ISD::ConstantFP: return "ConstantFP";
case ISD::GlobalAddress: return "GlobalAddress";
case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
@@ -112,7 +114,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
}
case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
- case ISD::TargetConstant: return "TargetConstant";
+ case ISD::TargetConstant:
+ if (cast<ConstantSDNode>(this)->isOpaque())
+ return "OpaqueTargetConstant";
+ return "TargetConstant";
case ISD::TargetConstantFP: return "TargetConstantFP";
case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
@@ -352,7 +357,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
e = MN->memoperands_end(); i != e; ++i) {
OS << **i;
- if (llvm::next(i) != e)
+ if (std::next(i) != e)
OS << " ";
}
OS << ">";
@@ -385,7 +390,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
dyn_cast<GlobalAddressSDNode>(this)) {
int64_t offset = GADN->getOffset();
OS << '<';
- WriteAsOperand(OS, GADN->getGlobal());
+ GADN->getGlobal()->printAsOperand(OS);
OS << '>';
if (offset > 0)
OS << " + " << offset;
@@ -476,9 +481,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
dyn_cast<BlockAddressSDNode>(this)) {
int64_t offset = BA->getOffset();
OS << "<";
- WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+ BA->getBlockAddress()->getFunction()->printAsOperand(OS, false);
OS << ", ";
- WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+ BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false);
OS << ">";
if (offset > 0)
OS << " + " << offset;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 3a0cfa16ae..5d0e2b937d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -20,7 +20,6 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -33,8 +32,8 @@
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
@@ -213,7 +212,7 @@ MachinePassRegistry RegisterScheduler::Registry;
static cl::opt<RegisterScheduler::FunctionPassCtor, false,
RegisterPassParser<RegisterScheduler> >
ISHeuristic("pre-RA-sched",
- cl::init(&createDefaultScheduler),
+ cl::init(&createDefaultScheduler), cl::Hidden,
cl::desc("Instruction schedulers available (before register"
" allocation):"));
@@ -400,7 +399,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
RegInfo = &MF->getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
LibInfo = &getAnalysis<TargetLibraryInfo>();
- TTI = getAnalysisIfAvailable<TargetTransformInfo>();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
TargetSubtargetInfo &ST =
@@ -418,8 +416,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
- CurDAG->init(*MF, TTI, TLI);
- FuncInfo->set(Fn, *MF);
+ CurDAG->init(*MF, TLI);
+ FuncInfo->set(Fn, *MF, CurDAG);
if (UseMBPI && OptLevel != CodeGenOpt::None)
FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
@@ -428,7 +426,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
SDB->init(GFI, *AA, LibInfo);
- MF->setHasMSInlineAsm(false);
+ MF->setHasInlineAsm(false);
+
SelectAllBasicBlocks(Fn);
// If the first basic block in the function has live ins that need to be
@@ -448,7 +447,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
bool hasFI = MI->getOperand(0).isFI();
- unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
+ unsigned Reg =
+ hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg))
EntryMBB->insert(EntryMBB->begin(), MI);
else {
@@ -456,7 +456,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
if (Def) {
MachineBasicBlock::iterator InsertPos = Def;
// FIXME: VR def may not be in entry block.
- Def->getParent()->insert(llvm::next(InsertPos), MI);
+ Def->getParent()->insert(std::next(InsertPos), MI);
} else
DEBUG(dbgs() << "Dropping debug info for dead vreg"
<< TargetRegisterInfo::virtReg2Index(Reg) << "\n");
@@ -483,9 +483,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// that COPY instructions also need DBG_VALUE, if it is the only
// user of LDI->second.
MachineInstr *CopyUseMI = NULL;
- for (MachineRegisterInfo::use_iterator
- UI = RegInfo->use_begin(LDI->second);
- MachineInstr *UseMI = UI.skipInstruction();) {
+ for (MachineRegisterInfo::use_instr_iterator
+ UI = RegInfo->use_instr_begin(LDI->second),
+ E = RegInfo->use_instr_end(); UI != E; ) {
+ MachineInstr *UseMI = &*(UI++);
if (UseMI->isDebugValue()) continue;
if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
CopyUseMI = UseMI; continue;
@@ -511,7 +512,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
++I) {
- if (MFI->hasCalls() && MF->hasMSInlineAsm())
+ if (MFI->hasCalls() && MF->hasInlineAsm())
break;
const MachineBasicBlock *MBB = I;
@@ -522,8 +523,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
II->isStackAligningInlineAsm()) {
MFI->setHasCalls(true);
}
- if (II->isMSInlineAsm()) {
- MF->setHasMSInlineAsm(true);
+ if (II->isInlineAsm()) {
+ MF->setHasInlineAsm(true);
}
}
}
@@ -563,6 +564,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// at this point.
FuncInfo->clear();
+ DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n");
+ DEBUG(MF->print(dbgs()));
+
return true;
}
@@ -800,7 +804,7 @@ public:
/// NodeDeleted - Handle nodes deleted from the graph. If the node being
/// deleted is the current ISelPosition node, update ISelPosition.
///
- virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ void NodeDeleted(SDNode *N, SDNode *E) override {
if (ISelPosition == SelectionDAG::allnodes_iterator(N))
++ISelPosition;
}
@@ -1063,7 +1067,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// where they are, so we can be sure to emit subsequent instructions
// after them.
if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
- FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt));
+ FastIS->setLastLocalValue(std::prev(FuncInfo->InsertPt));
else
FastIS->setLastLocalValue(0);
}
@@ -1071,7 +1075,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
unsigned NumFastIselRemaining = std::distance(Begin, End);
// Do FastISel on as many instructions as possible.
for (; BI != Begin; --BI) {
- const Instruction *Inst = llvm::prior(BI);
+ const Instruction *Inst = std::prev(BI);
// If we no longer require this instruction, skip it.
if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
@@ -1092,7 +1096,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Try to fold the load if so.
const Instruction *BeforeInst = Inst;
while (BeforeInst != Begin) {
- BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
+ BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst));
if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
break;
}
@@ -1100,7 +1104,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
BeforeInst->hasOneUse() &&
FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
// If we succeeded, don't re-select the load.
- BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
+ BI = std::next(BasicBlock::const_iterator(BeforeInst));
--NumFastIselRemaining;
++NumFastIselSuccess;
}
@@ -2194,8 +2198,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, const TargetLowering *TLI,
- unsigned ChildNo) {
+ SDValue N, const TargetLowering *TLI, unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
return false; // Match fails if out of range child #.
return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
@@ -2231,6 +2234,14 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo));
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
@@ -2313,6 +2324,14 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
case SelectionDAGISel::OPC_CheckInteger:
Result = !::CheckInteger(Table, Index, N);
return Index;
+ case SelectionDAGISel::OPC_CheckChild0Integer:
+ case SelectionDAGISel::OPC_CheckChild1Integer:
+ case SelectionDAGISel::OPC_CheckChild2Integer:
+ case SelectionDAGISel::OPC_CheckChild3Integer:
+ case SelectionDAGISel::OPC_CheckChild4Integer:
+ Result = !::CheckChildInteger(Table, Index, N,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer);
+ return Index;
case SelectionDAGISel::OPC_CheckAndImm:
Result = !::CheckAndImm(Table, Index, N, SDISel);
return Index;
@@ -2693,6 +2712,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_CheckInteger:
if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
continue;
+ case OPC_CheckChild0Integer: case OPC_CheckChild1Integer:
+ case OPC_CheckChild2Integer: case OPC_CheckChild3Integer:
+ case OPC_CheckChild4Integer:
+ if (!::CheckChildInteger(MatcherTable, MatcherIndex, N,
+ Opcode-OPC_CheckChild0Integer)) break;
+ continue;
case OPC_CheckAndImm:
if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
continue;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index b752b482e3..1483fddfdd 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -15,12 +15,11 @@
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 82b068d25c..5de0b030c7 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/CommandLine.h"
@@ -74,6 +75,7 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
+ isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
Alignment = CS->getParamAlignment(AttrIdx);
}
@@ -1115,6 +1117,54 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
(KnownOne.countPopulation() == 1);
}
+bool TargetLowering::isConstTrueVal(const SDNode *N) const {
+ if (!N)
+ return false;
+
+ bool IsVec = false;
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN) {
+ const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
+ if (!BV)
+ return false;
+
+ IsVec = true;
+ CN = BV->getConstantSplatValue();
+ }
+
+ switch (getBooleanContents(IsVec)) {
+ case UndefinedBooleanContent:
+ return CN->getAPIntValue()[0];
+ case ZeroOrOneBooleanContent:
+ return CN->isOne();
+ case ZeroOrNegativeOneBooleanContent:
+ return CN->isAllOnesValue();
+ }
+
+ llvm_unreachable("Invalid boolean contents");
+}
+
+bool TargetLowering::isConstFalseVal(const SDNode *N) const {
+ if (!N)
+ return false;
+
+ bool IsVec = false;
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN) {
+ const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
+ if (!BV)
+ return false;
+
+ IsVec = true;
+ CN = BV->getConstantSplatValue();
+ }
+
+ if (getBooleanContents(IsVec) == UndefinedBooleanContent)
+ return !CN->getAPIntValue()[0];
+
+ return CN->isNullValue();
+}
+
/// SimplifySetCC - Try to simplify a setcc built with the specified operands
/// and cc. If it is unable to simplify it, return a null SDValue.
SDValue
@@ -1468,18 +1518,32 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Canonicalize GE/LE comparisons to use GT/LT comparisons.
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
- // X >= C0 --> X > (C0-1)
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(C1-1, N1.getValueType()),
- (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+ // X >= C0 --> X > (C0 - 1)
+ APInt C = C1 - 1;
+ ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
+ if ((DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+ (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
+ isLegalICmpImmediate(C.getSExtValue())))) {
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C, N1.getValueType()),
+ NewCC);
+ }
}
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
- // X <= C0 --> X < (C0+1)
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(C1+1, N1.getValueType()),
- (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+ // X <= C0 --> X < (C0 + 1)
+ APInt C = C1 + 1;
+ ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
+ if ((DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+ (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
+ isLegalICmpImmediate(C.getSExtValue())))) {
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C, N1.getValueType()),
+ NewCC);
+ }
}
if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
@@ -1535,7 +1599,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOpcode() == ISD::AND)
if (ConstantSDNode *AndRHS =
dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+ EVT ShiftTy = DCI.isBeforeLegalize() ?
getPointerTy() : getShiftAmountTy(N0.getValueType());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
@@ -1565,7 +1629,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
const APInt &AndRHSC = AndRHS->getAPIntValue();
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
- EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+ EVT ShiftTy = DCI.isBeforeLegalize() ?
getPointerTy() : getShiftAmountTy(N0.getValueType());
EVT CmpTy = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
@@ -1593,7 +1657,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
NewC = NewC.lshr(ShiftBits);
if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
- EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+ EVT ShiftTy = DCI.isBeforeLegalize() ?
getPointerTy() : getShiftAmountTy(N0.getValueType());
EVT CmpTy = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
@@ -2663,3 +2727,14 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
}
}
+
+bool TargetLowering::
+verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
+ if (!isa<ConstantSDNode>(Op.getOperand(0))) {
+ DAG.getContext()->emitError("argument to '__builtin_return_address' must "
+ "be a constant integer");
+ return true;
+ }
+
+ return false;
+}
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index f769b44efb..1120be8ed2 100644
--- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -16,7 +16,7 @@
using namespace llvm;
TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
- : TD(TM.getDataLayout()) {
+ : DL(TM.getDataLayout()) {
}
TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {