aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/R600/R600ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/R600ISelLowering.cpp')
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp150
1 files changed, 91 insertions, 59 deletions
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 6405a82b3a..d6c68305da 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -82,9 +82,31 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
- setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
- setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
+
+ // Expand sign extension of vectors
+ if (!Subtarget->hasBFE())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
+
+ if (!Subtarget->hasBFE())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
+
+ if (!Subtarget->hasBFE())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
@@ -117,6 +139,11 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ // These should be replaced by UDVIREM, but it does not happen automatically
+ // during Type Legalization
+ setOperationAction(ISD::UDIV, MVT::i64, Custom);
+ setOperationAction(ISD::UREM, MVT::i64, Custom);
+
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setBooleanContents(ZeroOrNegativeOneBooleanContent);
@@ -538,8 +565,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(2, MVT::i32), // SWZ_Z
DAG.getConstant(3, MVT::i32) // SWZ_W
};
- return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
- Args, 8);
+ return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
}
// default for switch(IntrinsicID)
@@ -689,7 +715,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
Op.getOperand(9),
Op.getOperand(10)
};
- return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
+ return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
}
case AMDGPUIntrinsic::AMDGPU_dp4: {
SDValue Args[8] = {
@@ -710,7 +736,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
DAG.getConstant(3, MVT::i32))
};
- return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
+ return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
}
case Intrinsic::r600_read_ngroups_x:
@@ -960,13 +986,6 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
}
-
- // Possible Min/Max pattern
- SDValue MinMax = LowerMinMax(Op, DAG);
- if (MinMax.getNode()) {
- return MinMax;
- }
-
// If we make it this for it means we have no native instructions to handle
// this SELECT_CC, so we must lower it.
SDValue HWTrue, HWFalse;
@@ -1088,10 +1107,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(0, MVT::i32),
Mask
};
- SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
+ SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
SDValue Args[3] = { Chain, Input, DWordAddr };
return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
- Op->getVTList(), Args, 3, MemVT,
+ Op->getVTList(), Args, MemVT,
StoreNode->getMemOperand());
} else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Value.getValueType().bitsGE(MVT::i32)) {
@@ -1131,7 +1150,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if (ValueVT.isVector()) {
unsigned NumElemVT = ValueVT.getVectorNumElements();
EVT ElemVT = ValueVT.getVectorElementType();
- SDValue Stores[4];
+ SmallVector<SDValue, 4> Stores(NumElemVT);
assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
"vector width in load");
@@ -1148,7 +1167,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Chain, Elem, Ptr,
DAG.getTargetConstant(Channel, MVT::i32));
}
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
} else {
if (ValueVT == MVT::i8) {
Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
@@ -1212,10 +1231,11 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
if (Ret.getNode()) {
- SDValue Ops[2];
- Ops[0] = Ret;
- Ops[1] = Chain;
- return DAG.getMergeValues(Ops, 2, DL);
+ SDValue Ops[2] = {
+ Ret,
+ Chain
+ };
+ return DAG.getMergeValues(Ops, DL);
}
@@ -1224,7 +1244,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SplitVectorLoad(Op, DAG),
Chain
};
- return DAG.getMergeValues(MergedValues, 2, DL);
+ return DAG.getMergeValues(MergedValues, DL);
}
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
@@ -1232,8 +1252,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
(LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
SDValue Result;
- if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
- isa<Constant>(LoadNode->getSrcValue()) ||
+ if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
+ isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
isa<ConstantSDNode>(Ptr)) {
SDValue Slots[4];
for (unsigned i = 0; i < 4; i++) {
@@ -1252,7 +1272,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
NewVT = VT;
NumElements = VT.getVectorNumElements();
}
- Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
+ Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
+ makeArrayRef(Slots, NumElements));
} else {
// non-constant ptr can't be folded, keeps it as a v4f32 load
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
@@ -1268,10 +1289,10 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
}
SDValue MergedValues[2] = {
- Result,
- Chain
+ Result,
+ Chain
};
- return DAG.getMergeValues(MergedValues, 2, DL);
+ return DAG.getMergeValues(MergedValues, DL);
}
// For most operations returning SDValue() will result in the node being
@@ -1295,7 +1316,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
SDValue MergedValues[2] = { Sra, Chain };
- return DAG.getMergeValues(MergedValues, 2, DL);
+ return DAG.getMergeValues(MergedValues, DL);
}
if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
@@ -1332,7 +1353,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
Loads[i] = DAG.getUNDEF(ElemVT);
}
EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
- LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
+ LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
} else {
LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
Chain, Ptr,
@@ -1340,11 +1361,12 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
Op.getOperand(2));
}
- SDValue Ops[2];
- Ops[0] = LoweredLoad;
- Ops[1] = Chain;
+ SDValue Ops[2] = {
+ LoweredLoad,
+ Chain
+ };
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
/// XXX Only kernel functions are supported, so we can assume for now that
@@ -1365,8 +1387,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
SmallVector<ISD::InputArg, 8> LocalIns;
- getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
- LocalIns);
+ getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
AnalyzeFormalArguments(CCInfo, LocalIns);
@@ -1392,32 +1413,38 @@ SDValue R600TargetLowering::LowerFormalArguments(
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
- SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
+
+ // FIXME: This should really check the extload type, but the handling of
+ // extload vecto parameters seems to be broken.
+ //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ ISD::LoadExtType Ext = ISD::SEXTLOAD;
+ SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
MachinePointerInfo(UndefValue::get(PtrTy)),
MemVT, false, false, 4);
- // 4 is the preferred alignment for
- // the CONSTANT memory space.
+
+ // 4 is the preferred alignment for the CONSTANT memory space.
InVals.push_back(Arg);
}
return Chain;
}
EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
- if (!VT.isVector()) return MVT::i32;
+ if (!VT.isVector())
+ return MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
-static SDValue
-CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
- DenseMap<unsigned, unsigned> &RemapSwizzle) {
+static SDValue CompactSwizzlableVector(
+ SelectionDAG &DAG, SDValue VectorEntry,
+ DenseMap<unsigned, unsigned> &RemapSwizzle) {
assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
assert(RemapSwizzle.empty());
SDValue NewBldVec[4] = {
- VectorEntry.getOperand(0),
- VectorEntry.getOperand(1),
- VectorEntry.getOperand(2),
- VectorEntry.getOperand(3)
+ VectorEntry.getOperand(0),
+ VectorEntry.getOperand(1),
+ VectorEntry.getOperand(2),
+ VectorEntry.getOperand(3)
};
for (unsigned i = 0; i < 4; i++) {
@@ -1448,7 +1475,7 @@ CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
- VectorEntry.getValueType(), NewBldVec, 4);
+ VectorEntry.getValueType(), NewBldVec);
}
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
@@ -1486,7 +1513,7 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
- VectorEntry.getValueType(), NewBldVec, 4);
+ VectorEntry.getValueType(), NewBldVec);
}
@@ -1524,6 +1551,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
+ default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
case ISD::FP_ROUND: {
SDValue Arg = N->getOperand(0);
@@ -1613,8 +1641,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
}
// Return the new vector
- return DAG.getNode(ISD::BUILD_VECTOR, dl,
- VT, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
// Extract_vec (Build_vector) generated by custom lowering
@@ -1638,6 +1665,11 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::SELECT_CC: {
+ // Try common optimizations
+ SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
+ if (Ret.getNode())
+ return Ret;
+
// fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
// selectcc x, y, a, b, inv(cc)
//
@@ -1697,7 +1729,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
};
SDLoc DL(N);
NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
- return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
+ return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
}
case AMDGPUISD::TEXTURE_FETCH: {
SDValue Arg = N->getOperand(1);
@@ -1727,10 +1759,11 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
};
NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
- NewArgs, 19);
+ NewArgs);
}
}
- return SDValue();
+
+ return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
static bool
@@ -1779,8 +1812,7 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
};
std::vector<unsigned> Consts;
- for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
- int OtherSrcIdx = SrcIndices[i];
+ for (int OtherSrcIdx : SrcIndices) {
int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
if (OtherSrcIdx < 0 || OtherSelIdx < 0)
continue;
@@ -1791,14 +1823,14 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
if (RegisterSDNode *Reg =
dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
if (Reg->getReg() == AMDGPU::ALU_CONST) {
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
- ParentNode->getOperand(OtherSelIdx));
+ ConstantSDNode *Cst
+ = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Consts.push_back(Cst->getZExtValue());
}
}
}
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+ ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Consts.push_back(Cst->getZExtValue());
if (!TII->fitsConstReadLimitations(Consts)) {
return false;