diff options
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 35 | ||||
-rw-r--r-- | test/CodeGen/ARM/2013-07-29-vector-or-combine.ll | 32 |
2 files changed, 53 insertions, 14 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ea13c88be8..61c5bd14c9 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -8407,22 +8407,29 @@ static SDValue PerformORCombine(SDNode *N, unsigned SplatBitSize; bool HasAnyUndefs; + APInt SplatBits0, SplatBits1; BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); - APInt SplatBits0; + BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); + // Ensure that the second operand of both ands are constants if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs) { - BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); - APInt SplatBits1; - if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs && - SplatBits0 == ~SplatBits1) { - // Canonicalize the vector type to make instruction selection simpler. - EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; - SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, - N0->getOperand(1), N0->getOperand(0), - N1->getOperand(0)); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); - } + HasAnyUndefs) && !HasAnyUndefs) { + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + // Ensure that the bit width of the constants are the same and that + // the splat arguments are logical inverses as per the pattern we + // are trying to simplify. + if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && + SplatBits0 == ~SplatBits1) { + // Canonicalize the vector type to make instruction selection + // simpler. + EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, + N0->getOperand(1), + N0->getOperand(0), + N1->getOperand(0)); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + } } } diff --git a/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll b/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll new file mode 100644 index 0000000000..a438c1f455 --- /dev/null +++ b/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7--linux-gnueabi" + +; CHECK-LABEL: function +define void @function() { +; CHECK: cmp r0, #0 +; CHECK: bxne lr +; CHECK: vmov.i32 q8, #0xff0000 +entry: + br i1 undef, label %vector.body, label %for.end + +; CHECK: vld1.32 {d18, d19}, [r0] +; CHECK: vand q10, q9, q8 +; CHECK: vbic.i16 q9, #0xff +; CHECK: vorr q9, q9, q10 +; CHECK: vst1.32 {d18, d19}, [r0] +vector.body: + %wide.load = load <4 x i32>* undef, align 4 + %0 = and <4 x i32> %wide.load, <i32 -16711936, i32 -16711936, i32 -16711936, i32 -16711936> + %1 = sub <4 x i32> %wide.load, zeroinitializer + %2 = and <4 x i32> %1, <i32 16711680, i32 16711680, i32 16711680, i32 16711680> + %3 = or <4 x i32> undef, %0 + %4 = or <4 x i32> %3, %2 + store <4 x i32> %4, <4 x i32>* undef, align 4 + br label %vector.body + +for.end: + ret void +} + |