aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp47
-rw-r--r--lib/Target/X86/X86ISelLowering.h1
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td4
-rw-r--r--lib/Target/X86/X86InstrSSE.td14
4 files changed, 66 insertions, 0 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 42d42da08d..b330a4f980 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6151,6 +6151,48 @@ static inline unsigned getVPERMILOpcode(EVT VT) {
return 0;
}
+/// isVectorBroadcast - Check if the node chain is suitable to be xformed to
+/// a vbroadcast node. The nodes are suitable whenever we can fold a load coming
+/// from a 32 or 64 bit scalar. Update Op to the desired load to be folded.
+static bool isVectorBroadcast(SDValue &Op) {
+ EVT VT = Op.getValueType();
+ bool Is256 = VT.getSizeInBits() == 256;
+
+ assert((VT.getSizeInBits() == 128 || Is256) &&
+ "Unsupported type for vbroadcast node");
+
+ SDValue V = Op;
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ if (Is256 && !(V.hasOneUse() &&
+ V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(0).getOpcode() == ISD::UNDEF))
+ return false;
+
+ if (Is256)
+ V = V.getOperand(1);
+ if (V.hasOneUse() && V.getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+
+ // Check the source scalar_to_vector type. 256-bit broadcasts are
+ // supported for 32/64-bit sizes, while 128-bit ones are only supported
+ // for 32-bit scalars.
+ unsigned ScalarSize = V.getOperand(0).getValueType().getSizeInBits();
+ if (ScalarSize != 32 && ScalarSize != 64)
+ return false;
+ if (!Is256 && ScalarSize == 64)
+ return false;
+
+ V = V.getOperand(0);
+ if (!MayFoldLoad(V))
+ return false;
+
+ // Return the load node
+ Op = V;
+ return true;
+}
+
static
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI,
@@ -6174,6 +6216,10 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
return Op;
+ // Use vbroadcast whenever the splat comes from a foldable load
+ if (Subtarget->hasAVX() && isVectorBroadcast(V1))
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, V1);
+
// Handle splats by matching through known shuffle masks
if (VT.is128BitVector() && NumElem <= 4)
return SDValue();
@@ -10189,6 +10235,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
+ case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS";
case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY";
case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD";
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 0aaef2a84d..908157429e 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -276,6 +276,7 @@ namespace llvm {
VPERMILPD,
VPERMILPDY,
VPERM2F128,
+ VBROADCAST,
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
// according to %al. An operator is needed so that this can be expanded
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 2ccf4c4785..e80038e3a0 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -109,6 +109,8 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+
def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
@@ -160,6 +162,8 @@ def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>;
def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
+def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 003bd72022..3a75b0e166 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5474,6 +5474,20 @@ def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
(VBROADCASTF128 addr:$src)>;
+def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSY addr:$src)>;
+def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VBROADCASTSD addr:$src)>;
+def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSY addr:$src)>;
+def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VBROADCASTSD addr:$src)>;
+
+def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSS addr:$src)>;
+def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSS addr:$src)>;
+
//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values
//