diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2012-01-24 13:54:13 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2012-01-24 13:54:13 +0000 |
commit | 28d7e71a3021d55dce21005112f444facbda0398 (patch) | |
tree | 8053b24002ccb0ee763e97e33ff35fb505807b40 | |
parent | 1ee0ecf84a07693c3a517ba030fac8ac1f9f3fbc (diff) | |
download | external_llvm-28d7e71a3021d55dce21005112f444facbda0398.tar.gz external_llvm-28d7e71a3021d55dce21005112f444facbda0398.tar.bz2 external_llvm-28d7e71a3021d55dce21005112f444facbda0398.zip |
ZERO_EXTEND operation is optimized for AVX.
v8i16 -> v8i32, v4i32 -> v4i64 - used vpunpck* instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148803 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 39 | ||||
-rwxr-xr-x | test/CodeGen/X86/avx-zext.ll | 17 |
2 files changed, 54 insertions, 2 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2655ce9610..dc1fd7fc40 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14349,7 +14349,8 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> // (and (i32 x86isd::setcc_carry), 1) // This eliminates the zext. This transformation is necessary because @@ -14357,6 +14358,8 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { DebugLoc dl = N->getDebugLoc(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && N0.getOperand(0).hasOneUse()) { @@ -14371,6 +14374,38 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { N00.getOperand(0), N00.getOperand(1)), DAG.getConstant(1, VT)); } + // Optimize vectors in AVX mode: + // + // v8i16 -> v8i32 + // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32. + // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32. + // Concat upper and lower parts. + // + // v4i32 -> v4i64 + // Use vpunpckldq for 4 lower elements v4i32 -> v2i64. + // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. + // Concat upper and lower parts. + // + if (Subtarget->hasAVX()) { + + if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) || + ((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) { + + SDValue ZeroVec = getZeroVector(OpVT, Subtarget->hasSSE2(), Subtarget->hasAVX2(), + DAG, dl); + SDValue OpLo = getTargetShuffleNode(X86ISD::UNPCKL, dl, OpVT, N0, ZeroVec, DAG); + SDValue OpHi = getTargetShuffleNode(X86ISD::UNPCKH, dl, OpVT, N0, ZeroVec, DAG); + + EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + VT.getVectorNumElements()/2); + + OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); + } + } + return SDValue(); } @@ -14558,7 +14593,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); - case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); + case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, Subtarget); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::PALIGN: diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll new file mode 100755 index 0000000000..795a7b3dd4 --- /dev/null +++ b/test/CodeGen/X86/avx-zext.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { +;CHECK: zext_8i16_to_8i32 +;CHECK: vpunpckhwd + + %B = zext <8 x i16> %A to <8 x i32> + ret <8 x i32>%B +} + +define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { +;CHECK: zext_4i32_to_4i64 +;CHECK: vpunpckhdq + + %B = zext <4 x i32> %A to <4 x i64> + ret <4 x i64>%B +} |