2 files changed, 53 insertions, 14 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ea13c88be8..61c5bd14c9 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -8407,22 +8407,29 @@ static SDValue PerformORCombine(SDNode *N,
     unsigned SplatBitSize;
     bool HasAnyUndefs;
 
+    APInt SplatBits0, SplatBits1;
     BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
-    APInt SplatBits0;
+    BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+    // Ensure that the second operand of both ands are constants
     if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
-                                  HasAnyUndefs) && !HasAnyUndefs) {
-      BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
-      APInt SplatBits1;
-      if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
-                                    HasAnyUndefs) && !HasAnyUndefs &&
-          SplatBits0 == ~SplatBits1) {
-        // Canonicalize the vector type to make instruction selection simpler.
-        EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
-        SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
-                                     N0->getOperand(1), N0->getOperand(0),
-                                     N1->getOperand(0));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Result);
-      }
+                                      HasAnyUndefs) && !HasAnyUndefs) {
+        if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+                                          HasAnyUndefs) && !HasAnyUndefs) {
+            // Ensure that the bit width of the constants are the same and that
+            // the splat arguments are logical inverses as per the pattern we
+            // are trying to simplify.
+            if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
+                SplatBits0 == ~SplatBits1) {
+                // Canonicalize the vector type to make instruction selection
+                // simpler.
+                EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+                SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+                                             N0->getOperand(1),
+                                             N0->getOperand(0),
+                                             N1->getOperand(0));
+                return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+            }
+        }
     }
   }
 
diff --git a/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll b/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
new file mode 100644
index 0000000000..a438c1f455
--- /dev/null
+++ b/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7--linux-gnueabi"
+
+; CHECK-LABEL: function
+define void @function() {
+; CHECK: cmp r0, #0
+; CHECK: bxne lr
+; CHECK: vmov.i32 q8, #0xff0000
+entry:
+  br i1 undef, label %vector.body, label %for.end
+
+; CHECK: vld1.32 {d18, d19}, [r0]
+; CHECK: vand q10, q9, q8
+; CHECK: vbic.i16 q9, #0xff
+; CHECK: vorr q9, q9, q10
+; CHECK: vst1.32 {d18, d19}, [r0]
+vector.body:
+  %wide.load = load <4 x i32>* undef, align 4
+  %0 = and <4 x i32> %wide.load, <i32 -16711936, i32 -16711936, i32 -16711936, i32 -16711936>
+  %1 = sub <4 x i32> %wide.load, zeroinitializer
+  %2 = and <4 x i32> %1, <i32 16711680, i32 16711680, i32 16711680, i32 16711680>
+  %3 = or <4 x i32> undef, %0
+  %4 = or <4 x i32> %3, %2
+  store <4 x i32> %4, <4 x i32>* undef, align 4
+  br label %vector.body
+
+for.end:
+  ret void
+}
+