aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp890
1 files changed, 541 insertions, 349 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4ab92add25..1c3b9ae790 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -605,10 +605,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Expand FP immediates into loads from the stack, except for the special
// cases we handle.
@@ -633,8 +635,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Special cases we handle for FP constants.
addLegalFPImmediate(APFloat(+0.0f)); // xorps
@@ -644,8 +647,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f64 , Expand);
- setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
}
} else if (!TM.Options.UseSoftFloat) {
// f32 and f64 in x87.
@@ -659,10 +663,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f32 , Expand);
- setOperationAction(ISD::FSIN , MVT::f64 , Expand);
- setOperationAction(ISD::FCOS , MVT::f32 , Expand);
- setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
@@ -699,8 +705,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f80 , Expand);
- setOperationAction(ISD::FCOS , MVT::f80 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f80, Expand);
+ setOperationAction(ISD::FCOS , MVT::f80, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
}
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
@@ -748,7 +755,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
@@ -1047,6 +1056,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
}
+ setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
@@ -1111,6 +1122,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v16i16, Custom);
setOperationAction(ISD::SRA, MVT::v32i8, Custom);
+ setOperationAction(ISD::SDIV, MVT::v16i16, Custom);
+
setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
@@ -1166,6 +1179,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SHL, MVT::v8i32, Legal);
setOperationAction(ISD::SRA, MVT::v8i32, Legal);
+
+ setOperationAction(ISD::SDIV, MVT::v8i32, Custom);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
@@ -1275,6 +1290,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setLibcallName(RTLIB::SRA_I128, 0);
}
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->isTargetDarwin()) {
+ // For MacOSX, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret to avoid memory
+ // traffic.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
+
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
@@ -1295,6 +1323,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
@@ -1306,17 +1335,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// On Darwin, -Os means optimize for size without hurting performance,
// do not reduce the limit.
- maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
- maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
- maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
- maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
- maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
- maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+ MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
+ MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+ MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(4); // 2^4 bytes.
- benefitFromCodePlacementOpt = true;
+ BenefitFromCodePlacementOpt = true;
// Predictable cmov don't hurt on atom because it's in-order.
- predictableSelectIsExpensive = !Subtarget->isAtom();
+ PredictableSelectIsExpensive = !Subtarget->isAtom();
setPrefFunctionAlignment(4); // 2^4 bytes.
}
@@ -1562,14 +1591,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
- // Add the regs to the liveout set for the function.
- MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
- MRI.addLiveOut(RVLocs[i].getLocReg());
-
SDValue Flag;
-
SmallVector<SDValue, 6> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
@@ -1638,12 +1660,13 @@ X86TargetLowering::LowerReturn(SDValue Chain,
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- // The x86-64 ABI for returning structs by value requires that we copy
- // the sret argument into %rax for the return. We saved the argument into
- // a virtual register in the entry block, so now we copy the value out
- // and into %rax.
+ // The x86-64 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // We saved the argument into a virtual register in the entry block,
+ // so now we copy the value out and into %rax/%eax.
if (Subtarget->is64Bit() &&
DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
MachineFunction &MF = DAG.getMachineFunction();
@@ -1653,11 +1676,12 @@ X86TargetLowering::LowerReturn(SDValue Chain,
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
- Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+ unsigned RetValReg = Subtarget->isTarget64BitILP32() ? X86::EAX : X86::RAX;
+ Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
Flag = Chain.getValue(1);
- // RAX now acts like a return value.
- MRI.addLiveOut(X86::RAX);
+ // RAX/EAX now acts like a return value.
+ RetOps.push_back(DAG.getRegister(RetValReg, MVT::i64));
}
RetOps[0] = Chain; // Update chain.
@@ -2009,14 +2033,16 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
InVals.push_back(ArgValue);
}
- // The x86-64 ABI for returning structs by value requires that we copy
- // the sret argument into %rax for the return. Save the argument into
- // a virtual register so that we can access it from the return points.
+ // The x86-64 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // Save the argument into a virtual register so that we can access it
+ // from the return points.
if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
- Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ MVT PtrTy = getPointerTy();
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
@@ -2630,8 +2656,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// This isn't right, although it's probably harmless on x86; liveouts
// should be computed from returns not tail calls. Consider a void
// function making a tail call to a function returning int.
- return DAG.getNode(X86ISD::TC_RETURN, dl,
- NodeTys, &Ops[0], Ops.size());
+ return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
}
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
@@ -2789,7 +2814,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const {
+ SelectionDAG &DAG) const {
if (!IsTailCallConvention(CalleeCC) &&
CalleeCC != CallingConv::C)
return false;
@@ -2828,7 +2853,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// An stdcall caller is expected to clean up its arguments; the callee
// isn't going to do that.
- if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+ if (!CCMatch && CallerCC == CallingConv::X86_StdCall)
return false;
// Do not sibcall optimize vararg calls unless all arguments are passed via
@@ -2948,9 +2973,15 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// callee-saved registers are restored. These happen to be the same
// registers used to pass 'inreg' arguments so watch out for those.
if (!Subtarget->is64Bit() &&
- !isa<GlobalAddressSDNode>(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee)) {
+ ((!isa<GlobalAddressSDNode>(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) ||
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)) {
unsigned NumInRegs = 0;
+ // In PIC we need an extra register to formulate the address computation
+ // for the callee.
+ unsigned MaxInRegs =
+ (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc())
@@ -2959,7 +2990,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
switch (Reg) {
default: break;
case X86::EAX: case X86::EDX: case X86::ECX:
- if (++NumInRegs == 3)
+ if (++NumInRegs == MaxInRegs)
return false;
break;
}
@@ -2995,7 +3026,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFP:
- case X86ISD::PALIGN:
+ case X86ISD::PALIGNR:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
case X86ISD::MOVHLPS:
@@ -3045,7 +3076,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
- case X86ISD::PALIGN:
+ case X86ISD::PALIGNR:
case X86ISD::SHUFP:
case X86ISD::VPERM2X128:
return DAG.getNode(Opc, dl, VT, V1, V2,
@@ -3355,8 +3386,8 @@ static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
/// is suitable for input to PALIGNR.
static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT,
const X86Subtarget *Subtarget) {
- if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) ||
- (VT.getSizeInBits() == 256 && !Subtarget->hasInt256()))
+ if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
+ (VT.is256BitVector() && !Subtarget->hasInt256()))
return false;
unsigned NumElts = VT.getVectorNumElements();
@@ -3445,7 +3476,7 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
/// reverse of what x86 shuffles want.
static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
bool Commuted = false) {
- if (!HasFp256 && VT.getSizeInBits() == 256)
+ if (!HasFp256 && VT.is256BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
@@ -3580,7 +3611,7 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
static
SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
DebugLoc dl = SVOp->getDebugLoc();
if (VT != MVT::v8i32 && VT != MVT::v8f32)
@@ -3630,7 +3661,7 @@ static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
@@ -3669,7 +3700,7 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
@@ -3700,14 +3731,14 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
-static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
- bool HasInt256) {
+static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
+ bool Is256BitVec = VT.is256BitVector();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ if (Is256BitVec && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
@@ -3715,7 +3746,7 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
// FIXME: Need a better way to get rid of this, there's no latency difference
// between UNPCKLPD and MOVDDUP, the later should always be checked first and
// the former later. We should also remove the "_undef" special mask.
- if (NumElts == 4 && VT.getSizeInBits() == 256)
+ if (NumElts == 4 && Is256BitVec)
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3749,7 +3780,7 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
@@ -3831,7 +3862,7 @@ static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned HalfSize = VT.getVectorNumElements()/2;
@@ -3865,7 +3896,7 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
unsigned NumElts = VT.getVectorNumElements();
// Only match 256-bit with 32/64-bit types
- if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8))
+ if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8))
return false;
unsigned NumLanes = VT.getSizeInBits()/128;
@@ -3921,8 +3952,8 @@ static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
unsigned NumElems = VT.getVectorNumElements();
- if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
- (VT.getSizeInBits() == 256 && NumElems != 8))
+ if ((VT.is128BitVector() && NumElems != 4) ||
+ (VT.is256BitVector() && NumElems != 8))
return false;
// "i+1" is the value the indexed mask element must have
@@ -3944,8 +3975,8 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
unsigned NumElems = VT.getVectorNumElements();
- if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
- (VT.getSizeInBits() == 256 && NumElems != 8))
+ if ((VT.is128BitVector() && NumElems != 4) ||
+ (VT.is256BitVector() && NumElems != 8))
return false;
// "i" is the value the indexed mask element must have
@@ -4005,9 +4036,8 @@ bool X86::isVEXTRACTF128Index(SDNode *N) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- unsigned VL = N->getValueType(0).getVectorNumElements();
- unsigned VBits = N->getValueType(0).getSizeInBits();
- unsigned ElSize = VBits / VL;
+ MVT VT = N->getValueType(0).getSimpleVT();
+ unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % 128 == 0;
return Result;
@@ -4024,9 +4054,8 @@ bool X86::isVINSERTF128Index(SDNode *N) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
- unsigned VL = N->getValueType(0).getVectorNumElements();
- unsigned VBits = N->getValueType(0).getSizeInBits();
- unsigned ElSize = VBits / VL;
+ MVT VT = N->getValueType(0).getSimpleVT();
+ unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % 128 == 0;
return Result;
@@ -4036,7 +4065,7 @@ bool X86::isVINSERTF128Index(SDNode *N) {
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
/// Handles 128-bit and 256-bit.
static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+ MVT VT = N->getValueType(0).getSimpleVT();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for PSHUF/SHUFP");
@@ -4066,7 +4095,7 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+ MVT VT = N->getValueType(0).getSimpleVT();
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
@@ -4090,7 +4119,7 @@ static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+ MVT VT = N->getValueType(0).getSimpleVT();
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
@@ -4114,7 +4143,7 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
unsigned NumElts = VT.getVectorNumElements();
@@ -4145,8 +4174,8 @@ unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- EVT VecVT = N->getOperand(0).getValueType();
- EVT ElVT = VecVT.getVectorElementType();
+ MVT VecVT = N->getOperand(0).getValueType().getSimpleVT();
+ MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
return Index / NumElemsPerChunk;
@@ -4162,8 +4191,8 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
- EVT VecVT = N->getValueType(0);
- EVT ElVT = VecVT.getVectorElementType();
+ MVT VecVT = N->getValueType(0).getSimpleVT();
+ MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
return Index / NumElemsPerChunk;
@@ -4173,7 +4202,7 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions.
/// Handles 256-bit.
static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+ MVT VT = N->getValueType(0).getSimpleVT();
unsigned NumElts = VT.getVectorNumElements();
@@ -4193,17 +4222,18 @@ static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
- return ((isa<ConstantSDNode>(Elt) &&
- cast<ConstantSDNode>(Elt)->isNullValue()) ||
- (isa<ConstantFPSDNode>(Elt) &&
- cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Elt))
+ return CN->isNullValue();
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
+ return CFP->getValueAPF().isPosZero();
+ return false;
}
/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
/// their permute mask.
static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> MaskVec;
@@ -4352,12 +4382,11 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- unsigned Size = VT.getSizeInBits();
// Always build SSE zero vectors as <4 x i32> bitcasted
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
- if (Size == 128) { // SSE
+ if (VT.is128BitVector()) { // SSE
if (Subtarget->hasSSE2()) { // SSE2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
@@ -4365,7 +4394,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
- } else if (Size == 256) { // AVX
+ } else if (VT.is256BitVector()) { // AVX
if (Subtarget->hasInt256()) { // AVX2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
@@ -4387,14 +4416,13 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
/// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG,
+static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- unsigned Size = VT.getSizeInBits();
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
- if (Size == 256) {
+ if (VT.is256BitVector()) {
if (HasInt256) { // AVX2
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
@@ -4402,7 +4430,7 @@ static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG,
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
}
- } else if (Size == 128) {
+ } else if (VT.is128BitVector()) {
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else
llvm_unreachable("Unexpected vector type");
@@ -4481,14 +4509,13 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
EVT VT = V.getValueType();
DebugLoc dl = V.getDebugLoc();
- unsigned Size = VT.getSizeInBits();
- if (Size == 128) {
+ if (VT.is128BitVector()) {
V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
&SplatMask[0]);
- } else if (Size == 256) {
+ } else if (VT.is256BitVector()) {
// To use VPERMILPS to splat scalars, the second half of indicies must
// refer to the higher part, which is a duplication of the lower one,
// because VPERMILPS can only handle in-lane permutations.
@@ -4512,14 +4539,14 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
int EltNo = SV->getSplatIndex();
int NumElems = SrcVT.getVectorNumElements();
- unsigned Size = SrcVT.getSizeInBits();
+ bool Is256BitVec = SrcVT.is256BitVector();
- assert(((Size == 128 && NumElems > 4) || Size == 256) &&
- "Unknown how to promote splat for type");
+ assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) &&
+ "Unknown how to promote splat for type");
// Extract the 128-bit part containing the splat element and update
// the splat element index when it refers to the higher register.
- if (Size == 256) {
+ if (Is256BitVec) {
V1 = Extract128BitVector(V1, EltNo, DAG, dl);
if (EltNo >= NumElems/2)
EltNo -= NumElems/2;
@@ -4536,7 +4563,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
// Recreate the 256-bit vector and place the same 128-bit vector
// into the low and high part. This is necessary because we want
// to use VPERM* to shuffle the vectors
- if (Size == 256) {
+ if (Is256BitVec) {
V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
}
@@ -4588,6 +4615,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
case X86ISD::MOVLHPS:
DecodeMOVLHPSMask(NumElems, Mask);
break;
+ case X86ISD::PALIGNR:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
case X86ISD::PSHUFD:
case X86ISD::VPERMILP:
ImmN = N->getOperand(N->getNumOperands()-1);
@@ -4631,7 +4662,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
case X86ISD::MOVLPS:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
- case X86ISD::PALIGN:
// Not yet implemented
return false;
default: llvm_unreachable("unknown target shuffle node");
@@ -5099,7 +5129,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget->hasFp256())
return SDValue();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
@@ -5297,8 +5327,8 @@ SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT ExtVT = VT.getVectorElementType();
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT ExtVT = VT.getVectorElementType();
unsigned NumElems = Op.getNumOperands();
// Vectors containing all zeros can be matched by pxor and xorps later
@@ -5314,7 +5344,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
// vpcmpeqd on 256-bit vectors.
- if (ISD::isBuildVectorAllOnes(Op.getNode())) {
+ if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
return Op;
@@ -5629,7 +5659,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
- EVT ResVT = Op.getValueType();
+ MVT ResVT = Op.getValueType().getSimpleVT();
assert(ResVT.is256BitVector() && "Value type must be 256-bit wide");
@@ -5655,8 +5685,8 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
- EVT VT = SVOp->getValueType(0);
- EVT EltVT = VT.getVectorElementType();
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
@@ -5667,41 +5697,40 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
// Check the mask for BLEND and build the value.
unsigned MaskValue = 0;
// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
- unsigned NumLanes = (NumElems-1)/8 + 1;
+ unsigned NumLanes = (NumElems-1)/8 + 1;
unsigned NumElemsInLane = NumElems / NumLanes;
// Blend for v16i16 should be symetric for the both lanes.
for (unsigned i = 0; i < NumElemsInLane; ++i) {
- int SndLaneEltIdx = (NumLanes == 2) ?
+ int SndLaneEltIdx = (NumLanes == 2) ?
SVOp->getMaskElt(i + NumElemsInLane) : -1;
int EltIdx = SVOp->getMaskElt(i);
- if ((EltIdx == -1 || EltIdx == (int)i) &&
- (SndLaneEltIdx == -1 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
+ if ((EltIdx < 0 || EltIdx == (int)i) &&
+ (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
continue;
- if (((unsigned)EltIdx == (i + NumElems)) &&
- (SndLaneEltIdx == -1 ||
+ if (((unsigned)EltIdx == (i + NumElems)) &&
+ (SndLaneEltIdx < 0 ||
(unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
MaskValue |= (1<<i);
- else
+ else
return SDValue();
}
// Convert i32 vectors to floating point if it is not AVX2.
// AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
- EVT BlendVT = VT;
+ MVT BlendVT = VT;
if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
- BlendVT = EVT::getVectorVT(*DAG.getContext(),
- EVT::getFloatingPointVT(EltVT.getSizeInBits()),
- NumElems);
+ BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
+ NumElems);
V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
}
-
- SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
- DAG.getConstant(MaskValue, MVT::i32));
+
+ SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
+ DAG.getConstant(MaskValue, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
}
@@ -5836,6 +5865,11 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
}
}
+ // Promote splats to a larger type which usually leads to more efficient code.
+ // FIXME: Is this true if pshufb is available?
+ if (SVOp->isSplat())
+ return PromoteSplat(SVOp, DAG);
+
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
@@ -5851,7 +5885,7 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
int EltIdx = MaskVals[i] * 2;
int Idx0 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx;
int Idx1 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx+1;
- pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8));
}
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
@@ -5969,6 +6003,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
DebugLoc dl = SVOp->getDebugLoc();
ArrayRef<int> MaskVals = SVOp->getMask();
+ // Promote splats to a larger type which usually leads to more efficient code.
+ // FIXME: Is this true if pshufb is available?
+ if (SVOp->isSplat())
+ return PromoteSplat(SVOp, DAG);
+
// If we have SSSE3, case 1 is generated when all result bytes come from
// one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
// present, fall back to case 3.
@@ -6087,7 +6126,7 @@ static
SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
@@ -6134,8 +6173,9 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
static
SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG, DebugLoc dl) {
+ SelectionDAG &DAG) {
MVT VT = SVOp->getValueType(0).getSimpleVT();
+ DebugLoc dl = SVOp->getDebugLoc();
unsigned NumElems = VT.getVectorNumElements();
MVT NewVT;
unsigned Scale;
@@ -6171,7 +6211,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
/// getVZextMovL - Return a zero-extending vector move low node.
///
-static SDValue getVZextMovL(EVT VT, EVT OpVT,
+static SDValue getVZextMovL(MVT VT, EVT OpVT,
SDValue SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget, DebugLoc dl) {
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
@@ -6213,14 +6253,14 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
if (NewOp.getNode())
return NewOp;
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLaneElems = NumElems / 2;
DebugLoc dl = SVOp->getDebugLoc();
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
- EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
+ MVT EltVT = VT.getVectorElementType();
+ MVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
SDValue Output[2];
SmallVector<int, 16> Mask;
@@ -6325,7 +6365,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
assert(VT.is128BitVector() && "Unsupported vector size");
@@ -6579,7 +6619,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
// Reduce a vector shuffle to zext.
SDValue
-X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
+X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
// PMOVZX is only available from SSE41.
if (!Subtarget->hasSSE41())
return SDValue();
@@ -6623,9 +6663,10 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+ LLVMContext *Context = DAG.getContext();
unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
- EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits);
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift);
+ EVT NeVT = EVT::getIntegerVT(*Context, NBits);
+ EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift);
if (!isTypeLegal(NVT))
return SDValue();
@@ -6644,8 +6685,21 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
// If it's foldable, i.e. normal load with single use, we will let code
// selection to fold it. Otherwise, we will short the conversion sequence.
if (CIdx && CIdx->getZExtValue() == 0 &&
- (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse()))
+ (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
+ if (V.getValueSizeInBits() > V1.getValueSizeInBits()) {
+ // The "ext_vec_elt" node is wider than the result node.
+ // In this case we should extract subvector from V.
+ // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
+ unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits();
+ EVT FullVT = V.getValueType();
+ EVT SubVecVT = EVT::getVectorVT(*Context,
+ FullVT.getVectorElementType(),
+ FullVT.getVectorNumElements()/Ratio);
+ V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
+ DAG.getIntPtrConstant(0));
+ }
V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+ }
}
return DAG.getNode(ISD::BITCAST, DL, VT,
@@ -6655,7 +6709,7 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -6665,25 +6719,14 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// Handle splat operations
if (SVOp->isSplat()) {
- unsigned NumElem = VT.getVectorNumElements();
- int Size = VT.getSizeInBits();
-
// Use vbroadcast whenever the splat comes from a foldable load
SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
if (Broadcast.getNode())
return Broadcast;
-
- // Handle splats by matching through known shuffle masks
- if ((Size == 128 && NumElem <= 4) ||
- (Size == 256 && NumElem <= 8))
- return SDValue();
-
- // All remaning splats are promoted to target supported vector shuffles.
- return PromoteSplat(SVOp, DAG);
}
// Check integer expanding shuffles.
- SDValue NewOp = lowerVectorIntExtend(Op, DAG);
+ SDValue NewOp = LowerVectorIntExtend(Op, DAG);
if (NewOp.getNode())
return NewOp;
@@ -6691,7 +6734,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
VT == MVT::v16i16 || VT == MVT::v32i8) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
} else if ((VT == MVT::v4i32 ||
@@ -6699,18 +6742,18 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
- EVT NewVT = NewOp.getValueType();
+ MVT NewVT = NewOp.getValueType().getSimpleVT();
if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
NewVT, true, false))
return getVZextMovL(VT, NewVT, NewOp.getOperand(0),
DAG, Subtarget, dl);
}
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
- EVT NewVT = NewOp.getValueType();
+ MVT NewVT = NewOp.getValueType().getSimpleVT();
if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
return getVZextMovL(VT, NewVT, NewOp.getOperand(1),
DAG, Subtarget, dl);
@@ -6725,7 +6768,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
unsigned NumElems = VT.getVectorNumElements();
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
@@ -6816,7 +6859,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
- EVT EltVT = VT.getVectorElementType();
+ MVT EltVT = VT.getVectorElementType();
ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
@@ -6855,7 +6898,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (isShift) {
// No better options. Use a vshldq / vsrldq.
- EVT EltVT = VT.getVectorElementType();
+ MVT EltVT = VT.getVectorElementType();
ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
@@ -6926,7 +6969,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// nodes, and remove one by one until they don't return Op anymore.
if (isPALIGNRMask(M, VT, Subtarget))
- return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+ return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2,
getShufflePALIGNRImmediate(SVOp),
DAG);
@@ -7035,13 +7078,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue
-X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
+static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
- if (!Op.getOperand(0).getValueType().is128BitVector())
+ if (!Op.getOperand(0).getValueType().getSimpleVT().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
@@ -7106,7 +7147,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return SDValue();
SDValue Vec = Op.getOperand(0);
- EVT VecVT = Vec.getValueType();
+ MVT VecVT = Vec.getValueType().getSimpleVT();
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
@@ -7133,7 +7174,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return Res;
}
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
@@ -7146,7 +7187,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
- EVT EltVT = MVT::i32;
+ MVT EltVT = MVT::i32;
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
@@ -7161,7 +7202,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// SHUFPS the element to the lowest double word, then movss.
int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 };
- EVT VVT = Op.getOperand(0).getValueType();
+ MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
@@ -7180,7 +7221,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
int Mask[2] = { 1, -1 };
- EVT VVT = Op.getOperand(0).getValueType();
+ MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
@@ -7190,11 +7231,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return SDValue();
}
-SDValue
-X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- EVT EltVT = VT.getVectorElementType();
+static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
DebugLoc dl = Op.getDebugLoc();
SDValue N0 = Op.getOperand(0);
@@ -7247,8 +7286,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- EVT EltVT = VT.getVectorElementType();
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
DebugLoc dl = Op.getDebugLoc();
SDValue N0 = Op.getOperand(0);
@@ -7296,7 +7335,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
- EVT OpVT = Op.getValueType();
+ MVT OpVT = Op.getValueType().getSimpleVT();
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
@@ -7511,8 +7550,7 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
- int64_t Offset,
- SelectionDAG &DAG) const {
+ int64_t Offset, SelectionDAG &DAG) const {
// Create the TargetGlobalAddress node, folding in the constant
// offset if it is legal.
unsigned char OpFlags =
@@ -7732,7 +7770,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
Subtarget->is64Bit(),
- getTargetMachine().getRelocationModel() == Reloc::PIC_);
+ getTargetMachine().getRelocationModel() == Reloc::PIC_);
}
llvm_unreachable("Unknown TLS model.");
}
@@ -8015,9 +8053,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
SmallVector<Constant*,2> CV1;
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 0x4330000000000000ULL))));
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
@@ -8111,7 +8151,8 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
"Custom UINT_TO_FP is not supported!");
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements());
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ SVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
}
@@ -8204,8 +8245,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
}
-std::pair<SDValue,SDValue> X86TargetLowering::
-FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const {
+std::pair<SDValue,SDValue>
+X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned, bool IsReplace) const {
DebugLoc DL = Op.getDebugLoc();
EVT DstTy = Op.getValueType();
@@ -8299,9 +8341,9 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
- EVT VT = Op->getValueType(0);
+ MVT VT = Op->getValueType(0).getSimpleVT();
SDValue In = Op->getOperand(0);
- EVT InVT = In.getValueType();
+ MVT InVT = In.getValueType().getSimpleVT();
DebugLoc dl = Op->getDebugLoc();
// Optimize vectors in AVX mode:
@@ -8330,7 +8372,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
- EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements()/2);
OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
@@ -8352,9 +8394,9 @@ SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op,
SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
SDValue In = Op.getOperand(0);
- EVT SVT = In.getValueType();
+ MVT SVT = In.getValueType().getSimpleVT();
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
@@ -8382,11 +8424,11 @@ SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
}
-SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
SDValue In = Op.getOperand(0);
- EVT SVT = In.getValueType();
+ MVT SVT = In.getValueType().getSimpleVT();
if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
@@ -8501,9 +8543,10 @@ SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getValueType().isVector()) {
- if (Op.getValueType() == MVT::v8i16)
- return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), Op.getValueType(),
+ MVT VT = Op.getValueType().getSimpleVT();
+ if (VT.isVector()) {
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), VT,
DAG.getNode(ISD::FP_TO_SINT, Op.getDebugLoc(),
MVT::v8i32, Op.getOperand(0)));
return SDValue();
@@ -8542,12 +8585,11 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
return FIST;
}
-SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
SDValue In = Op.getOperand(0);
- EVT SVT = In.getValueType();
+ MVT SVT = In.getValueType().getSimpleVT();
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
@@ -8559,8 +8601,8 @@ SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT EltVT = VT;
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
@@ -8568,9 +8610,11 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
}
Constant *C;
if (EltVT == MVT::f64)
- C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, ~(1ULL << 63))));
else
- C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
+ APInt(32, ~(1U << 31))));
C = ConstantVector::getSplat(NumElts, C);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
@@ -8591,8 +8635,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT EltVT = VT;
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
@@ -8600,9 +8644,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
}
Constant *C;
if (EltVT == MVT::f64)
- C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 1ULL << 63)));
else
- C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
+ APInt(32, 1U << 31)));
C = ConstantVector::getSplat(NumElts, C);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
@@ -8626,8 +8672,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT SrcVT = Op1.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT SrcVT = Op1.getValueType().getSimpleVT();
// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
@@ -8646,13 +8692,15 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// First get the sign bit of second operand.
SmallVector<Constant*,4> CV;
if (SrcVT == MVT::f64) {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+ const fltSemantics &Sem = APFloat::IEEEdouble;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 1ULL << 63))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
} else {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ const fltSemantics &Sem = APFloat::IEEEsingle;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 1U << 31))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
}
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8675,13 +8723,17 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// Clear first operand sign bit.
CV.clear();
if (VT == MVT::f64) {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+ const fltSemantics &Sem = APFloat::IEEEdouble;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
+ APInt(64, ~(1ULL << 63)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
} else {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ const fltSemantics &Sem = APFloat::IEEEsingle;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
+ APInt(32, ~(1U << 31)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8697,7 +8749,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue N0 = Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
// Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
@@ -8707,7 +8759,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
//
-SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op,
+ SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
if (!Subtarget->hasSSE41())
@@ -9139,65 +9192,10 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
return SDValue();
}
-SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-
- if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG);
-
- assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-
- // Optimize to BT if possible.
- // Lower (X & (1 << N)) == 0 to BT(X, N).
- // Lower ((X >>u N) & 1) != 0 to BT(X, N).
- // Lower ((X >>s N) & 1) != 0 to BT(X, N).
- if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
- Op1.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Op1)->isNullValue() &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
- SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
- if (NewSetCC.getNode())
- return NewSetCC;
- }
-
- // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
- // these.
- if (Op1.getOpcode() == ISD::Constant &&
- (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
- cast<ConstantSDNode>(Op1)->isNullValue()) &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-
- // If the input is a setcc, then reuse the input setcc or use a new one with
- // the inverted condition.
- if (Op0.getOpcode() == X86ISD::SETCC) {
- X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
- bool Invert = (CC == ISD::SETNE) ^
- cast<ConstantSDNode>(Op1)->isNullValue();
- if (!Invert) return Op0;
-
- CCode = X86::GetOppositeBranchCondition(CCode);
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
- }
- }
-
- bool isFP = Op1.getValueType().isFloatingPoint();
- unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
- if (X86CC == X86::COND_INVALID)
- return SDValue();
-
- SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
- EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), EFLAGS);
-}
-
// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
// ones, and then concatenate the result back.
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
"Unsupported value type for operation");
@@ -9217,26 +9215,27 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
// Issue the operation on the smaller types and concatenate the result back
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
- EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+ MVT EltVT = VT.getVectorElementType();
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
}
-SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDValue Cond;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+ bool isFP = Op.getOperand(1).getValueType().getSimpleVT().isFloatingPoint();
DebugLoc dl = Op.getDebugLoc();
if (isFP) {
#ifndef NDEBUG
- EVT EltVT = Op0.getValueType().getVectorElementType();
+ MVT EltVT = Op0.getValueType().getVectorElementType().getSimpleVT();
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
#endif
@@ -9377,6 +9376,63 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
return Result;
}
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+
+ MVT VT = Op.getValueType().getSimpleVT();
+
+ if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
+
+ assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Optimize to BT if possible.
+ // Lower (X & (1 << N)) == 0 to BT(X, N).
+ // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+ // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+ if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
+ Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op1)->isNullValue() &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+ if (NewSetCC.getNode())
+ return NewSetCC;
+ }
+
+ // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
+ // these.
+ if (Op1.getOpcode() == ISD::Constant &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+ // If the input is a setcc, then reuse the input setcc or use a new one with
+ // the inverted condition.
+ if (Op0.getOpcode() == X86ISD::SETCC) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ bool Invert = (CC == ISD::SETNE) ^
+ cast<ConstantSDNode>(Op1)->isNullValue();
+ if (!Invert) return Op0;
+
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ }
+ }
+
+ bool isFP = Op1.getValueType().getSimpleVT().isFloatingPoint();
+ unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
+ if (X86CC == X86::COND_INVALID)
+ return SDValue();
+
+ SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
+ EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), EFLAGS);
+}
+
// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getNode()->getOpcode();
@@ -9499,7 +9555,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
@@ -9610,9 +9666,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
- EVT VT = Op->getValueType(0);
+ MVT VT = Op->getValueType(0).getSimpleVT();
SDValue In = Op->getOperand(0);
- EVT InVT = In.getValueType();
+ MVT InVT = In.getValueType().getSimpleVT();
DebugLoc dl = Op->getDebugLoc();
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
@@ -9646,7 +9702,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]);
- EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
VT.getVectorNumElements()/2);
OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
@@ -10155,7 +10211,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
-// getTargetVShiftNOde - Handle vector element shifts where the shift amount
+// getTargetVShiftNode - Handle vector element shifts where the shift amount
// may or may not be a constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
SDValue SrcOp, SDValue ShAmt,
@@ -11377,13 +11433,55 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
}
+SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT EltTy = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Lower sdiv X, pow2-const.
+ BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
+ if (!C)
+ return SDValue();
+
+ APInt SplatValue, SplatUndef;
+ unsigned MinSplatBits;
+ bool HasAnyUndefs;
+ if (!C->isConstantSplat(SplatValue, SplatUndef, MinSplatBits, HasAnyUndefs))
+ return SDValue();
+
+ if ((SplatValue != 0) &&
+ (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
+ unsigned lg2 = SplatValue.countTrailingZeros();
+ // Splat the sign bit.
+ SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32);
+ SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG);
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32);
+ SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG);
+ SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
+ SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32);
+ SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG);
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (SplatValue.isNonNegative())
+ return SRA;
+
+ SmallVector<SDValue, 16> V(NumElts, DAG.getConstant(0, EltTy));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts);
+ return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA);
+ }
+ return SDValue();
+}
+
SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
- LLVMContext *Context = DAG.getContext();
if (!Subtarget->hasSSE2())
return SDValue();
@@ -11500,17 +11598,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
// Lower SHL with variable shift amount.
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
- Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1),
- DAG.getConstant(23, MVT::i32));
-
- const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U};
- Constant *C = ConstantDataVector::get(*Context, CV);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
- SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
- Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT));
Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
@@ -11519,8 +11609,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
// a = a << 5;
- Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1),
- DAG.getConstant(5, MVT::i32));
+ Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT));
Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
// Turn 'a' into a mask suitable for VSELECT
@@ -11952,6 +12041,43 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
+SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
+
+ // For MacOSX, we want to call an alternative entry point: __sincos_stret,
+ // which returns the values in two XMM registers.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Only optimize x86_64 for now. i386 is a bit messy. For f32,
+ // the small struct {f32, f32} is returned in (eax, edx). For f64,
+ // the results are returned via SRet in memory.
+ const char *LibcallName = (ArgVT == MVT::f64)
+ ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
+ false, false, false, false, 0,
+ CallingConv::C, /*isTaillCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.first;
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -11981,13 +12107,13 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
- case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG);
+ case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
- case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
@@ -12033,6 +12159,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
}
}
@@ -12372,7 +12500,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
- case X86ISD::PALIGN: return "X86ISD::PALIGN";
+ case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
@@ -12783,7 +12911,7 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
MachineFunction::iterator I = MBB;
++I;
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 &&
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
"Unexpected number of operands");
assert(MI->hasOneMemOperand() &&
@@ -13015,7 +13143,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
MachineFunction::iterator I = MBB;
++I;
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 &&
"Unexpected number of operands");
assert(MI->hasOneMemOperand() &&
@@ -15246,13 +15374,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) {
APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
- if (CondRHS.getConstantOperandVal(0) == -A-1) {
- SmallVector<SDValue, 32> V(VT.getVectorNumElements(),
- DAG.getConstant(-A, VT.getScalarType()));
+ if (CondRHS.getConstantOperandVal(0) == -A-1)
return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
- DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
- V.data(), V.size()));
- }
+ DAG.getConstant(-A, VT));
}
// Another special case: If C was a sign bit, the sub has been
@@ -15552,7 +15676,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
ConstantSDNode *CmpAgainst = 0;
if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
(CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
- dyn_cast<ConstantSDNode>(Cond.getOperand(0)) == 0) {
+ !isa<ConstantSDNode>(Cond.getOperand(0))) {
if (CC == X86::COND_NE &&
CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
@@ -15832,8 +15956,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
if (VT == MVT::f32 || VT == MVT::f64) {
bool ExpectingFlags = false;
// Check for any users that want flags:
- for (SDNode::use_iterator UI = N->use_begin(),
- UE = N->use_end();
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
!ExpectingFlags && UI != UE; ++UI)
switch (UI->getOpcode()) {
default:
@@ -15920,7 +16043,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
- if (VT.getSizeInBits() != 256)
+ if (!VT.is256BitVector())
return SDValue();
assert((N->getOpcode() == ISD::ANY_EXTEND ||
@@ -15929,7 +16052,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
SDValue Narrow = N->getOperand(0);
EVT NarrowVT = Narrow->getValueType(0);
- if (NarrowVT.getSizeInBits() != 128)
+ if (!NarrowVT.is128BitVector())
return SDValue();
if (Narrow->getOpcode() != ISD::XOR &&
@@ -16125,11 +16248,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
DebugLoc DL = N->getDebugLoc();
- // We are going to replace the AND, OR, NAND with either BLEND
- // or PSIGN, which only look at the MSB. The VSRAI instruction
- // does not affect the highest bit, so we can get rid of it.
- Mask = Mask.getOperand(0);
-
// Now we know we at least have a plendvb with the mask val. See if
// we can form a psignb/w/d.
// psign = x.type == y.type == mask.type && y = sub(0, x);
@@ -16138,7 +16256,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
"Unsupported VT for PSIGN");
- Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask);
+ Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
}
// PBLENDVB only available on SSE 4.1
@@ -16296,8 +16414,42 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
EVT MemVT = Ld->getMemoryVT();
DebugLoc dl = Ld->getDebugLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned RegSz = RegVT.getSizeInBits();
ISD::LoadExtType Ext = Ld->getExtensionType();
+ unsigned Alignment = Ld->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8;
+
+ // On Sandybridge unaligned 256bit loads are inefficient.
+ if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
+ !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
+ unsigned NumElems = RegVT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+
+ SDValue Ptr = Ld->getBasePtr();
+ SDValue Increment = DAG.getConstant(16, TLI.getPointerTy());
+
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ NumElems/2);
+ SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ std::max(Alignment/2U, 1U));
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Load1.getValue(1),
+ Load2.getValue(1));
+
+ SDValue NewVec = DAG.getUNDEF(RegVT);
+ NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl);
+ NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl);
+ return DCI.CombineTo(N, NewVec, TF, true);
+ }
// If this is a vector EXT Load then attempt to optimize it using a
// shuffle. If SSSE3 is not available we may emit an illegal shuffle but the
@@ -16312,7 +16464,6 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
assert(MemVT.isVector() && "Must load a vector from memory");
unsigned NumElems = RegVT.getVectorNumElements();
- unsigned RegSz = RegVT.getSizeInBits();
unsigned MemSz = MemVT.getSizeInBits();
assert(RegSz > MemSz && "Register size must be greater than the mem size");
@@ -16356,8 +16507,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
// Represent the data using the same element type that is stored in
// memory. In practice, we ''widen'' MemVT.
- EVT WideVecVT =
- EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ EVT WideVecVT =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
loadRegZize/MemVT.getScalarType().getSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
@@ -16426,10 +16577,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
// Build the arithmetic shift.
unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
MemVT.getVectorElementType().getSizeInBits();
- SmallVector<SDValue, 8> C(NumElems,
- DAG.getConstant(Amt, RegVT.getScalarType()));
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, RegVT, &C[0], C.size());
- Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, BV);
+ Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
+ DAG.getConstant(Amt, RegVT));
return DCI.CombineTo(N, Shuff, TF, true);
}
@@ -16462,16 +16611,21 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
DebugLoc dl = St->getDebugLoc();
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned Alignment = St->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8;
// If we are saving a concatenation of two XMM registers, perform two stores.
// On Sandy Bridge, 256-bit memory operations are executed by two
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
// memory operation.
if (VT.is256BitVector() && !Subtarget->hasInt256() &&
- StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
- StoredVal.getNumOperands() == 2) {
- SDValue Value0 = StoredVal.getOperand(0);
- SDValue Value1 = StoredVal.getOperand(1);
+ StVT == VT && !IsAligned) {
+ unsigned NumElems = VT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+
+ SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
+ SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
SDValue Ptr0 = St->getBasePtr();
@@ -16479,10 +16633,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ St->isNonTemporal(), Alignment);
SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ St->isNonTemporal(),
+ std::max(Alignment/2U, 1U));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
}
@@ -16917,6 +17072,41 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
+ DebugLoc dl = N->getDebugLoc();
+
+ // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
+ // both SSE and AVX2 since there is no sign-extended shift right
+ // operation on a vector with 64-bit elements.
+ //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
+ // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
+ if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)) {
+ SDValue N00 = N0.getOperand(0);
+
+ // EXTLOAD has a better solution on AVX2,
+ // it may be replaced with X86ISD::VSEXT node.
+ if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
+ if (!ISD::isNormalLoad(N00.getNode()))
+ return SDValue();
+
+ if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
+ SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
+ N00, N1);
+ return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
+ }
+ }
+ return SDValue();
+}
+
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -17002,7 +17192,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
}
}
- if (VT.isVector() && VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
if (R.getNode())
return R;
@@ -17037,8 +17227,8 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
-// as "sbb reg,reg", since it can be extended without zext and produces
+// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
+// as "sbb reg,reg", since it can be extended without zext and produces
// an all-ones bit which is more useful than 0/1 in some cases.
static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) {
return DAG.getNode(ISD::AND, DL, MVT::i8,
@@ -17056,13 +17246,13 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
SDValue EFLAGS = N->getOperand(1);
if (CC == X86::COND_A) {
- // Try to convert COND_A into COND_B in an attempt to facilitate
+ // Try to convert COND_A into COND_B in an attempt to facilitate
// materializing "setb reg".
//
// Do not flip "e > c", where "c" is a constant, because Cmp instruction
// cannot take an immediate as its first operand.
//
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
EFLAGS.getValueType().isInteger() &&
!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(),
@@ -17270,7 +17460,8 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
if (In.getOpcode() != X86ISD::VZEXT)
return SDValue();
- return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0));
+ return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0),
+ In.getOperand(0));
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
@@ -17308,13 +17499,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
+ case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
- case X86ISD::PALIGN:
+ case X86ISD::PALIGNR:
case X86ISD::UNPCKH:
case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
@@ -17497,7 +17689,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
- std::sort(AsmPieces.begin(), AsmPieces.end());
+ array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (AsmPieces.size() == 4 &&
AsmPieces[0] == "~{cc}" &&
AsmPieces[1] == "~{dirflag}" &&
@@ -17515,7 +17707,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
- std::sort(AsmPieces.begin(), AsmPieces.end());
+ array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (AsmPieces.size() == 4 &&
AsmPieces[0] == "~{cc}" &&
AsmPieces[1] == "~{dirflag}" &&
@@ -17995,7 +18187,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// really want an 8-bit or 32-bit register, map to the appropriate register
// class and return the appropriate register.
if (Res.second == &X86::GR16RegClass) {
- if (VT == MVT::i8) {
+ if (VT == MVT::i8 || VT == MVT::i1) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
@@ -18008,7 +18200,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Res.first = DestReg;
Res.second = &X86::GR8RegClass;
}
- } else if (VT == MVT::i32) {
+ } else if (VT == MVT::i32 || VT == MVT::f32) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
@@ -18025,7 +18217,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Res.first = DestReg;
Res.second = &X86::GR32RegClass;
}
- } else if (VT == MVT::i64) {
+ } else if (VT == MVT::i64 || VT == MVT::f64) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;