aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--lib/Target/X86/X86InstrCompiler.td17
-rw-r--r--test/CodeGen/X86/clz.ll47
3 files changed, 49 insertions, 21 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6b23d6104f..95c99e1dc2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -380,9 +380,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i16 , Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i64 , Expand);
if (Subtarget->hasBMI()) {
setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
} else {
@@ -394,9 +391,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i8 , Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i16 , Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i64 , Expand);
if (Subtarget->hasLZCNT()) {
setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
} else {
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index e0cf669bee..f593092df7 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1753,3 +1753,20 @@ def : Pat<(and GR64:$src1, i64immSExt8:$src2),
(AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(and GR64:$src1, i64immSExt32:$src2),
(AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Bit scan instruction patterns to match explicit zero-undef behavior.
+def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>;
+def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>;
+def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>;
+def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>;
+def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>;
+def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
+def : Pat<(ctlz_zero_undef GR16:$src), (XOR16ri (BSR16rr GR16:$src), 15)>;
+def : Pat<(ctlz_zero_undef GR32:$src), (XOR32ri (BSR32rr GR32:$src), 31)>;
+def : Pat<(ctlz_zero_undef GR64:$src), (XOR64ri8 (BSR64rr GR64:$src), 63)>;
+def : Pat<(ctlz_zero_undef (loadi16 addr:$src)),
+ (XOR16ri (BSR16rm addr:$src), 15)>;
+def : Pat<(ctlz_zero_undef (loadi32 addr:$src)),
+ (XOR32ri (BSR32rm addr:$src), 31)>;
+def : Pat<(ctlz_zero_undef (loadi64 addr:$src)),
+ (XOR64ri8 (BSR64rm addr:$src), 63)>;
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index 9b26efd10d..4e080309b6 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -1,48 +1,65 @@
; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
define i32 @t1(i32 %x) nounwind {
- %tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
- ret i32 %tmp
+ %tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
+ ret i32 %tmp
; CHECK: t1:
; CHECK: bsrl
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: xorl $31,
+; CHECK: ret
}
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
define i32 @t2(i32 %x) nounwind {
- %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
- ret i32 %tmp
+ %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
+ ret i32 %tmp
; CHECK: t2:
; CHECK: bsfl
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
}
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
define i16 @t3(i16 %x, i16 %y) nounwind {
entry:
- %tmp1 = add i16 %x, %y
- %tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1, i1 true ) ; <i16> [#uses=1]
- ret i16 %tmp2
+ %tmp1 = add i16 %x, %y
+ %tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1, i1 true ) ; <i16> [#uses=1]
+ ret i16 %tmp2
; CHECK: t3:
; CHECK: bsrw
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: xorw $15,
+; CHECK: ret
}
declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
-; Don't generate the cmovne when the source is known non-zero (and bsr would
-; not set ZF).
-; rdar://9490949
-
define i32 @t4(i32 %n) nounwind {
entry:
+; Generate a cmov to handle zero inputs when necessary.
; CHECK: t4:
; CHECK: bsrl
+; CHECK: cmov
+; CHECK: xorl $31,
+; CHECK: ret
+ %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+ ret i32 %tmp1
+}
+
+define i32 @t5(i32 %n) nounwind {
+entry:
+; Don't generate the cmovne when the source is known non-zero (and bsr would
+; not set ZF).
+; rdar://9490949
+; CHECK: t5:
+; CHECK: bsrl
; CHECK-NOT: cmov
+; CHECK: xorl $31,
; CHECK: ret
%or = or i32 %n, 1
- %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or, i1 true)
+ %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or, i1 false)
ret i32 %tmp1
}