diff options
Diffstat (limited to 'test/CodeGen')
130 files changed, 5007 insertions, 530 deletions
diff --git a/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll b/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll new file mode 100644 index 0000000000..f7adf73263 --- /dev/null +++ b/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -mtriple=arm-unknown-linux-gnueabi + +define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) { + %1 = load i32* undef ; <i32> [#uses=1] + %2 = sub i32 %1, 48 ; <i32> [#uses=1] + br i1 undef, label %stack_overflow, label %no_overflow + +stack_overflow: ; preds = %0 + unreachable + +no_overflow: ; preds = %0 + %frame = inttoptr i32 %2 to [17 x i32]* ; <[17 x i32]*> [#uses=4] + %3 = load i32* undef ; <i32> [#uses=1] + %4 = load i32* null ; <i32> [#uses=1] + %5 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1] + %6 = bitcast i32* %5 to [8 x i8]** ; <[8 x i8]**> [#uses=1] + %7 = load [8 x i8]** %6 ; <[8 x i8]*> [#uses=1] + %8 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 12 ; <i32*> [#uses=1] + %9 = load i32* %8 ; <i32> [#uses=1] + br i1 undef, label %bci_13, label %bci_4 + +bci_13: ; preds = %no_overflow + br i1 undef, label %bci_30, label %bci_21 + +bci_30: ; preds = %bci_13 + br i1 undef, label %bci_46, label %bci_35 + +bci_46: ; preds = %bci_30 + %10 = sub i32 %4, %3 ; <i32> [#uses=1] + %11 = load [8 x i8]** null ; <[8 x i8]*> [#uses=1] + %callee = bitcast [8 x i8]* %11 to [84 x i8]* ; <[84 x i8]*> [#uses=1] + %12 = bitcast i8* undef to i32* ; <i32*> [#uses=1] + %base_pc7 = load i32* %12 ; <i32> [#uses=2] + %13 = add i32 %base_pc7, 0 ; <i32> [#uses=1] + %14 = inttoptr i32 %13 to void ([84 x i8]*, i32, [788 x i8]*)** ; <void ([84 x i8]*, i32, [788 x i8]*)**> [#uses=1] + %entry_point = load void ([84 x i8]*, i32, [788 x i8]*)** %14 ; <void ([84 x i8]*, i32, [788 x i8]*)*> [#uses=1] + %15 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 1 ; <i32*> [#uses=1] + %16 = ptrtoint i32* %15 to i32 ; <i32> [#uses=1] + %stack_pointer_addr9 = bitcast i8* undef to i32* ; <i32*> [#uses=1] + store i32 %16, i32* %stack_pointer_addr9 + %17 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 2 ; <i32*> [#uses=1] + store i32 %9, i32* %17 + store i32 %10, i32* undef + store [84 x i8]* %method, [84 x i8]** undef + %18 = add i32 %base_pc, 20 ; <i32> [#uses=1] + store i32 %18, i32* undef + store [8 x i8]* %7, [8 x i8]** undef + call void %entry_point([84 x i8]* %callee, i32 %base_pc7, [788 x i8]* %thread) + br i1 undef, label %no_exception, label %exception + +exception: ; preds = %bci_46 + ret void + +no_exception: ; preds = %bci_46 + ret void + +bci_35: ; preds = %bci_30 + ret void + +bci_21: ; preds = %bci_13 + ret void + +bci_4: ; preds = %no_overflow + ret void +} diff --git a/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll b/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll new file mode 100644 index 0000000000..b0b4cb37d1 --- /dev/null +++ b/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll @@ -0,0 +1,54 @@ +; RUN: llc < %s -march=arm + +define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) { + %1 = sub i32 undef, 48 ; <i32> [#uses=1] + br i1 undef, label %stack_overflow, label %no_overflow + +stack_overflow: ; preds = %0 + unreachable + +no_overflow: ; preds = %0 + %frame = inttoptr i32 %1 to [17 x i32]* ; <[17 x i32]*> [#uses=4] + %2 = load i32* null ; <i32> [#uses=2] + %3 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1] + %4 = load i32* %3 ; <i32> [#uses=2] + %5 = load [8 x i8]** undef ; <[8 x i8]*> [#uses=2] + br i1 undef, label %bci_13, label %bci_4 + +bci_13: ; preds = %no_overflow + br i1 undef, label %bci_30, label %bci_21 + +bci_30: ; preds = %bci_13 + %6 = icmp sle i32 %2, %4 ; <i1> [#uses=1] + br i1 %6, label %bci_46, label %bci_35 + +bci_46: ; preds = %bci_30 + store [84 x i8]* %method, [84 x i8]** undef + br i1 false, label %no_exception, label %exception + +exception: ; preds = %bci_46 + ret void + +no_exception: ; preds = %bci_46 + ret void + +bci_35: ; preds = %bci_30 + %7 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 15 ; <i32*> [#uses=1] + store i32 %2, i32* %7 + %8 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1] + store i32 %4, i32* %8 + %9 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1] + %10 = bitcast i32* %9 to [8 x i8]** ; <[8 x i8]**> [#uses=1] + store [8 x i8]* %5, [8 x i8]** %10 + call void inttoptr (i32 13839116 to void ([788 x i8]*, i32)*)([788 x i8]* %thread, i32 7) + ret void + +bci_21: ; preds = %bci_13 + ret void + +bci_4: ; preds = %no_overflow + store [8 x i8]* %5, [8 x i8]** undef + store i32 undef, i32* undef + call void inttoptr (i32 13839116 to void ([788 x i8]*, i32)*)([788 x i8]* %thread, i32 7) + ret void +} diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll index 72ec8efcc4..52ab8717c1 100644 --- a/test/CodeGen/ARM/arm-negative-stride.ll +++ b/test/CodeGen/ARM/arm-negative-stride.ll @@ -1,7 +1,32 @@ ; RUN: llc < %s -march=arm | FileCheck %s +; This loop is rewritten with an indvar which counts down, which +; frees up a register from holding the trip count. + define void @test(i32* %P, i32 %A, i32 %i) nounwind { entry: +; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2] + icmp eq i32 %i, 0 ; <i1>:0 [#uses=1] + br i1 %0, label %return, label %bb + +bb: ; preds = %bb, %entry + %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2] + %i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1] + %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1] + store i32 %A, i32* %tmp2 + %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2] + icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1] + br i1 %1, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} + +; This loop has a non-address use of the count-up indvar, so +; it'll remain. Now the original store uses a negative-stride address. + +define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind { +entry: ; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2] icmp eq i32 %i, 0 ; <i1>:0 [#uses=1] br i1 %0, label %return, label %bb @@ -11,6 +36,7 @@ bb: ; preds = %bb, %entry %i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1] %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1] store i32 %A, i32* %tmp2 + store i32 %indvar, i32* null %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2] icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1] br i1 %1, label %return, label %bb diff --git a/test/CodeGen/ARM/armv4.ll b/test/CodeGen/ARM/armv4.ll new file mode 100644 index 0000000000..49b129dabd --- /dev/null +++ b/test/CodeGen/ARM/armv4.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -mtriple=arm-unknown-eabi | FileCheck %s -check-prefix=THUMB +; RUN: llc < %s -mtriple=arm-unknown-eabi -mcpu=strongarm | FileCheck %s -check-prefix=ARM +; RUN: llc < %s -mtriple=arm-unknown-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=THUMB +; RUN: llc < %s -mtriple=arm-unknown-eabi -mattr=+v6 | FileCheck %s -check-prefix=THUMB +; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s -check-prefix=ARM +; RUN: llc < %s -mtriple=armv4t-unknown-eabi | FileCheck %s -check-prefix=THUMB + +define arm_aapcscc i32 @test(i32 %a) nounwind readnone { +entry: +; ARM: mov pc +; THUMB: bx + ret i32 %a +} diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll index 3dd66ae71d..c60b75b574 100644 --- a/test/CodeGen/ARM/call.ll +++ b/test/CodeGen/ARM/call.ll @@ -20,3 +20,17 @@ define void @g.upgrd.1() { %tmp.upgrd.2 = tail call i32 %tmp( ) ; <i32> [#uses=0] ret void } + +define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind { +; CHECKV4: m_231b +; CHECKV4: bx r{{.*}} +BB0: + %5 = inttoptr i32 %0 to i32* ; <i32*> [#uses=1] + %t35 = volatile load i32* %5 ; <i32> [#uses=1] + %6 = inttoptr i32 %t35 to i32** ; <i32**> [#uses=1] + %7 = getelementptr i32** %6, i32 86 ; <i32**> [#uses=1] + %8 = load i32** %7 ; <i32*> [#uses=1] + %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1] + %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1] + ret i32* %10 +} diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll index 507ec2c7bd..1bbb96deee 100644 --- a/test/CodeGen/ARM/lsr-code-insertion.ll +++ b/test/CodeGen/ARM/lsr-code-insertion.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed} -; RUN: llc < %s -stats |& grep {.*Number of re-materialization} +; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed} +; RUN: llc < %s -stats |& not grep {.*Number of re-materialization} ; This test really wants to check that the resultant "cond_true" block only ; has a single store in it, and that cond_true55 only has code to materialize ; the constant and do a store. We do *not* want something like this: diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll new file mode 100644 index 0000000000..d301c6a4ca --- /dev/null +++ b/test/CodeGen/ARM/neon_minmax.ll @@ -0,0 +1,81 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s + +define float @fmin_ole(float %x) nounwind { +;CHECK: fmin_ole: +;CHECK: vmin.f32 + %cond = fcmp ole float 1.0, %x + %min1 = select i1 %cond, float 1.0, float %x + ret float %min1 +} + +define float @fmin_ole_zero(float %x) nounwind { +;CHECK: fmin_ole_zero: +;CHECK-NOT: vmin.f32 + %cond = fcmp ole float 0.0, %x + %min1 = select i1 %cond, float 0.0, float %x + ret float %min1 +} + +define float @fmin_ult(float %x) nounwind { +;CHECK: fmin_ult: +;CHECK: vmin.f32 + %cond = fcmp ult float %x, 1.0 + %min1 = select i1 %cond, float %x, float 1.0 + ret float %min1 +} + +define float @fmax_ogt(float %x) nounwind { +;CHECK: fmax_ogt: +;CHECK: vmax.f32 + %cond = fcmp ogt float 1.0, %x + %max1 = select i1 %cond, float 1.0, float %x + ret float %max1 +} + +define float @fmax_uge(float %x) nounwind { +;CHECK: fmax_uge: +;CHECK: vmax.f32 + %cond = fcmp uge float %x, 1.0 + %max1 = select i1 %cond, float %x, float 1.0 + ret float %max1 +} + +define float @fmax_uge_zero(float %x) nounwind { +;CHECK: fmax_uge_zero: +;CHECK-NOT: vmax.f32 + %cond = fcmp uge float %x, 0.0 + %max1 = select i1 %cond, float %x, float 0.0 + ret float %max1 +} + +define float @fmax_olt_reverse(float %x) nounwind { +;CHECK: fmax_olt_reverse: +;CHECK: vmax.f32 + %cond = fcmp olt float %x, 1.0 + %max1 = select i1 %cond, float 1.0, float %x + ret float %max1 +} + +define float @fmax_ule_reverse(float %x) nounwind { +;CHECK: fmax_ule_reverse: +;CHECK: vmax.f32 + %cond = fcmp ult float 1.0, %x + %max1 = select i1 %cond, float %x, float 1.0 + ret float %max1 +} + +define float @fmin_oge_reverse(float %x) nounwind { +;CHECK: fmin_oge_reverse: +;CHECK: vmin.f32 + %cond = fcmp oge float %x, 1.0 + %min1 = select i1 %cond, float 1.0, float %x + ret float %min1 +} + +define float @fmin_ugt_reverse(float %x) nounwind { +;CHECK: fmin_ugt_reverse: +;CHECK: vmin.f32 + %cond = fcmp ugt float 1.0, %x + %min1 = select i1 %cond, float %x, float 1.0 + ret float %min1 +} diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll index 367f782e0f..92c1cf1821 100644 --- a/test/CodeGen/ARM/remat.ll +++ b/test/CodeGen/ARM/remat.ll @@ -32,16 +32,16 @@ bb.i35: ; preds = %bb142 br label %phi1.exit phi1.exit: ; preds = %bb.i35, %bb142 - %.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=0] + %.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1] %9 = phi double [ %8, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1] - %10 = fmul double undef, %9 ; <double> [#uses=0] + %10 = fmul double %.pn, %9 ; <double> [#uses=1] br i1 %14, label %phi0.exit, label %bb.i bb.i: ; preds = %phi1.exit unreachable phi0.exit: ; preds = %phi1.exit - %11 = fsub double %4, undef ; <double> [#uses=1] + %11 = fsub double %4, %10 ; <double> [#uses=1] %12 = fadd double 0.000000e+00, %11 ; <double> [#uses=1] store double %12, double* undef, align 4 br label %bb142 diff --git a/test/CodeGen/Alpha/add.ll b/test/CodeGen/Alpha/add.ll index 24a74188f8..cd883f64a6 100644 --- a/test/CodeGen/Alpha/add.ll +++ b/test/CodeGen/Alpha/add.ll @@ -4,9 +4,8 @@ ; RUN: grep { addl} %t.s | count 2 ; RUN: grep { addq} %t.s | count 2 ; RUN: grep { subl} %t.s | count 2 -; RUN: grep { subq} %t.s | count 1 +; RUN: grep { subq} %t.s | count 2 ; -; RUN: grep {lda \$0,-100(\$16)} %t.s | count 1 ; RUN: grep {s4addl} %t.s | count 2 ; RUN: grep {s8addl} %t.s | count 2 ; RUN: grep {s4addq} %t.s | count 2 diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll index 46da56681d..1ac1408290 100644 --- a/test/CodeGen/Blackfin/promote-logic.ll +++ b/test/CodeGen/Blackfin/promote-logic.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=bfin > %t -; XFAIL: * +; RUN: llc < %s -march=bfin ; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR ; operation after LegalizeOps. diff --git a/test/CodeGen/CellSPU/bss.ll b/test/CodeGen/CellSPU/bss.ll new file mode 100644 index 0000000000..05a0f50039 --- /dev/null +++ b/test/CodeGen/CellSPU/bss.ll @@ -0,0 +1,5 @@ +; RUN: llc < %s -march=cellspu > %t1.s +; RUN: grep "\.section" %t1.s | grep "\.bss" | count 1 + +@bssVar = global i32 zeroinitializer + diff --git a/test/CodeGen/Generic/2007-05-05-Personality.ll b/test/CodeGen/Generic/2007-05-05-Personality.ll index 27493261d5..c92783e5e4 100644 --- a/test/CodeGen/Generic/2007-05-05-Personality.ll +++ b/test/CodeGen/Generic/2007-05-05-Personality.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -o - | grep zPLR +; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -o - | grep zPL @error = external global i8 ; <i8*> [#uses=2] diff --git a/test/CodeGen/Generic/GC/argpromotion.ll b/test/CodeGen/Generic/GC/argpromotion.ll index dda376d616..c63ce222b8 100644 --- a/test/CodeGen/Generic/GC/argpromotion.ll +++ b/test/CodeGen/Generic/GC/argpromotion.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -anders-aa -argpromotion +; RUN: opt < %s -argpromotion declare void @llvm.gcroot(i8**, i8*) diff --git a/test/CodeGen/Generic/debug-info.ll b/test/CodeGen/Generic/debug-info.ll deleted file mode 100644 index 20d9f913c1..0000000000 --- a/test/CodeGen/Generic/debug-info.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: llc < %s - - %lldb.compile_unit = type { i32, i16, i16, i8*, i8*, i8*, { }* } -@d.compile_unit7 = external global %lldb.compile_unit ; <%lldb.compile_unit*> [#uses=1] - -declare void @llvm.dbg.stoppoint(i32, i32, %lldb.compile_unit*) - -define void @rb_raise(i32, ...) { -entry: - br i1 false, label %strlen.exit, label %no_exit.i - -no_exit.i: ; preds = %entry - ret void - -strlen.exit: ; preds = %entry - call void @llvm.dbg.stoppoint( i32 4358, i32 0, %lldb.compile_unit* @d.compile_unit7 ) - unreachable -} - diff --git a/test/CodeGen/MBlaze/brind.ll b/test/CodeGen/MBlaze/brind.ll new file mode 100644 index 0000000000..7798e0f56a --- /dev/null +++ b/test/CodeGen/MBlaze/brind.ll @@ -0,0 +1,73 @@ +; Ensure that the select instruction is supported and is lowered to +; some sort of branch instruction. +; +; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s + +declare i32 @printf(i8*, ...) +@MSG = internal constant [13 x i8] c"Message: %d\0A\00" + +@BLKS = private constant [5 x i8*] + [ i8* blockaddress(@brind, %L1), + i8* blockaddress(@brind, %L2), + i8* blockaddress(@brind, %L3), + i8* blockaddress(@brind, %L4), + i8* blockaddress(@brind, %L5) ] + +define i32 @brind(i32 %a, i32 %b) +{ + ; CHECK: brind: +entry: + br label %loop + +loop: + %tmp.0 = phi i32 [ 0, %entry ], [ %tmp.8, %finish ] + %dst.0 = getelementptr [5 x i8*]* @BLKS, i32 0, i32 %tmp.0 + %dst.1 = load i8** %dst.0 + indirectbr i8* %dst.1, [ label %L1, + label %L2, + label %L3, + label %L4, + label %L5 ] + ; CHECK: br {{r[0-9]*}} + +L1: + %tmp.1 = add i32 %a, %b + br label %finish + ; CHECK: br + +L2: + %tmp.2 = sub i32 %a, %b + br label %finish + ; CHECK: br + +L3: + %tmp.3 = mul i32 %a, %b + br label %finish + ; CHECK: br + +L4: + %tmp.4 = sdiv i32 %a, %b + br label %finish + ; CHECK: br + +L5: + %tmp.5 = srem i32 %a, %b + br label %finish + ; CHECK: br + +finish: + %tmp.6 = phi i32 [ %tmp.1, %L1 ], + [ %tmp.2, %L2 ], + [ %tmp.3, %L3 ], + [ %tmp.4, %L4 ], + [ %tmp.5, %L5 ] + + call i32 (i8*,...)* @printf( i8* getelementptr([13 x i8]* @MSG,i32 0,i32 0), + i32 %tmp.6) + + %tmp.7 = add i32 %tmp.0, 1 + %tmp.8 = urem i32 %tmp.7, 5 + + br label %loop + ; CHECK: br +} diff --git a/test/CodeGen/MBlaze/callind.ll b/test/CodeGen/MBlaze/callind.ll new file mode 100644 index 0000000000..bfc8d001fd --- /dev/null +++ b/test/CodeGen/MBlaze/callind.ll @@ -0,0 +1,80 @@ +; Ensure that indirect calls work and that they are lowered to some +; sort of branch and link instruction. +; +; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s + +declare i32 @printf(i8*, ...) +@MSG = internal constant [13 x i8] c"Message: %d\0A\00" + +@FUNS = private constant [5 x i32 (i32,i32)*] + [ i32 (i32,i32)* @doadd, + i32 (i32,i32)* @dosub, + i32 (i32,i32)* @domul, + i32 (i32,i32)* @dodiv, + i32 (i32,i32)* @dorem ] + +define i32 @doadd(i32 %a, i32 %b) +{ + ; CHECK: doadd: + %tmp.0 = add i32 %a, %b + ret i32 %tmp.0 + ; CHECK: rtsd +} + +define i32 @dosub(i32 %a, i32 %b) +{ + ; CHECK: dosub: + %tmp.0 = sub i32 %a, %b + ret i32 %tmp.0 + ; CHECK: rtsd +} + +define i32 @domul(i32 %a, i32 %b) +{ + ; CHECK: domul: + %tmp.0 = mul i32 %a, %b + ret i32 %tmp.0 + ; CHECK: rtsd +} + +define i32 @dodiv(i32 %a, i32 %b) +{ + ; CHECK: dodiv: + %tmp.0 = sdiv i32 %a, %b + ret i32 %tmp.0 + ; CHECK: rtsd +} + +define i32 @dorem(i32 %a, i32 %b) +{ + ; CHECK: dorem: + %tmp.0 = srem i32 %a, %b + ret i32 %tmp.0 + ; CHECK: rtsd +} + +define i32 @callind(i32 %a, i32 %b) +{ + ; CHECK: callind: +entry: + br label %loop + +loop: + %tmp.0 = phi i32 [ 0, %entry ], [ %tmp.3, %loop ] + %dst.0 = getelementptr [5 x i32 (i32,i32)*]* @FUNS, i32 0, i32 %tmp.0 + %dst.1 = load i32 (i32,i32)** %dst.0 + %tmp.1 = call i32 %dst.1(i32 %a, i32 %b) + ; CHECK-NOT: brli + ; CHECK-NOT: brlai + ; CHECK: brl + + call i32 (i8*,...)* @printf( i8* getelementptr([13 x i8]* @MSG,i32 0,i32 0), + i32 %tmp.1) + ; CHECK: brl + + %tmp.2 = add i32 %tmp.0, 1 + %tmp.3 = urem i32 %tmp.2, 5 + + br label %loop + ; CHECK: br +} diff --git a/test/CodeGen/MBlaze/cc.ll b/test/CodeGen/MBlaze/cc.ll new file mode 100644 index 0000000000..aaa918ffc3 --- /dev/null +++ b/test/CodeGen/MBlaze/cc.ll @@ -0,0 +1,315 @@ +; Test some of the calling convention lowering done by the MBlaze backend. +; We test that integer values are passed in the correct registers and +; returned in the correct registers. Additionally, we test that the stack +; is used as appropriate for passing arguments that cannot be placed into +; registers. +; +; RUN: llc < %s -march=mblaze | FileCheck %s + +declare i32 @printf(i8*, ...) +@MSG = internal constant [13 x i8] c"Message: %d\0A\00" + +define void @params0_noret() { + ; CHECK: params0_noret: + ret void + ; CHECK-NOT: {{.* r3, r0, 1}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i8 @params0_8bitret() { + ; CHECK: params0_8bitret: + ret i8 1 + ; CHECK: {{.* r3, r0, 1}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i16 @params0_16bitret() { + ; CHECK: params0_16bitret: + ret i16 1 + ; CHECK: {{.* r3, r0, 1}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i32 @params0_32bitret() { + ; CHECK: params0_32bitret: + ret i32 1 + ; CHECK: {{.* r3, r0, 1}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i64 @params0_64bitret() { + ; CHECK: params0_64bitret: + ret i64 1 + ; CHECK: {{.* r3, r0, .*}} + ; CHECK: {{.* r4, r0, 1}} + ; CHECK: rtsd +} + +define i32 @params1_32bitret(i32 %a) { + ; CHECK: params1_32bitret: + ret i32 %a + ; CHECK: {{.* r3, r5, r0}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i32 @params2_32bitret(i32 %a, i32 %b) { + ; CHECK: params2_32bitret: + ret i32 %b + ; CHECK: {{.* r3, r6, r0}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i32 @params3_32bitret(i32 %a, i32 %b, i32 %c) { + ; CHECK: params3_32bitret: + ret i32 %c + ; CHECK: {{.* r3, r7, r0}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i32 @params4_32bitret(i32 %a, i32 %b, i32 %c, i32 %d) { + ; CHECK: params4_32bitret: + ret i32 %d + ; CHECK: {{.* r3, r8, r0}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i32 @params5_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { + ; CHECK: params5_32bitret: + ret i32 %e + ; CHECK: {{.* r3, r9, r0}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i32 @params6_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { + ; CHECK: params6_32bitret: + ret i32 %f + ; CHECK: {{.* r3, r10, r0}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i32 @params7_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, + i32 %g) { + ; CHECK: params7_32bitret: + ret i32 %g + ; CHECK: {{lwi? r3, r1, 32}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i32 @params8_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, + i32 %g, i32 %h) { + ; CHECK: params8_32bitret: + ret i32 %h + ; CHECK: {{lwi? r3, r1, 36}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i32 @params9_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, + i32 %g, i32 %h, i32 %i) { + ; CHECK: params9_32bitret: + ret i32 %i + ; CHECK: {{lwi? r3, r1, 40}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define i32 @params10_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, + i32 %g, i32 %h, i32 %i, i32 %j) { + ; CHECK: params10_32bitret: + ret i32 %j + ; CHECK: {{lwi? r3, r1, 44}} + ; CHECK-NOT: {{.* r4, .*, .*}} + ; CHECK: rtsd +} + +define void @testing() { + %MSG.1 = getelementptr [13 x i8]* @MSG, i32 0, i32 0 + + call void @params0_noret() + ; CHECK: brlid + + %tmp.1 = call i8 @params0_8bitret() + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i8 %tmp.1) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + %tmp.2 = call i16 @params0_16bitret() + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i16 %tmp.2) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + %tmp.3 = call i32 @params0_32bitret() + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.3) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + %tmp.4 = call i64 @params0_64bitret() + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i64 %tmp.4) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK: {{.* r7, r4, r0}} + ; CHECK: brlid + + %tmp.5 = call i32 @params1_32bitret(i32 1) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.5) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + %tmp.6 = call i32 @params2_32bitret(i32 1, i32 2) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, .*, .*}} + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.6) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + %tmp.7 = call i32 @params3_32bitret(i32 1, i32 2, i32 3) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, .*, .*}} + ; CHECK: {{.* r7, .*, .*}} + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.7) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + %tmp.8 = call i32 @params4_32bitret(i32 1, i32 2, i32 3, i32 4) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, .*, .*}} + ; CHECK: {{.* r7, .*, .*}} + ; CHECK: {{.* r8, .*, .*}} + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.8) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + %tmp.9 = call i32 @params5_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, .*, .*}} + ; CHECK: {{.* r7, .*, .*}} + ; CHECK: {{.* r8, .*, .*}} + ; CHECK: {{.* r9, .*, .*}} + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.9) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + %tmp.10 = call i32 @params6_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5, + i32 6) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, .*, .*}} + ; CHECK: {{.* r7, .*, .*}} + ; CHECK: {{.* r8, .*, .*}} + ; CHECK: {{.* r9, .*, .*}} + ; CHECK: {{.* r10, .*, .*}} + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.10) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + %tmp.11 = call i32 @params7_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5, + i32 6, i32 7) + ; CHECK: {{swi? .*, r1, 28}} + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, .*, .*}} + ; CHECK: {{.* r7, .*, .*}} + ; CHECK: {{.* r8, .*, .*}} + ; CHECK: {{.* r9, .*, .*}} + ; CHECK: {{.* r10, .*, .*}} + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.11) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + %tmp.12 = call i32 @params8_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5, + i32 6, i32 7, i32 8) + ; CHECK: {{swi? .*, r1, 28}} + ; CHECK: {{swi? .*, r1, 32}} + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, .*, .*}} + ; CHECK: {{.* r7, .*, .*}} + ; CHECK: {{.* r8, .*, .*}} + ; CHECK: {{.* r9, .*, .*}} + ; CHECK: {{.* r10, .*, .*}} + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.12) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + %tmp.13 = call i32 @params9_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5, + i32 6, i32 7, i32 8, i32 9) + ; CHECK: {{swi? .*, r1, 28}} + ; CHECK: {{swi? .*, r1, 32}} + ; CHECK: {{swi? .*, r1, 36}} + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, .*, .*}} + ; CHECK: {{.* r7, .*, .*}} + ; CHECK: {{.* r8, .*, .*}} + ; CHECK: {{.* r9, .*, .*}} + ; CHECK: {{.* r10, .*, .*}} + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.13) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + %tmp.14 = call i32 @params10_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5, + i32 6, i32 7, i32 8, i32 9, i32 10) + ; CHECK: {{swi? .*, r1, 28}} + ; CHECK: {{swi? .*, r1, 32}} + ; CHECK: {{swi? .*, r1, 36}} + ; CHECK: {{swi? .*, r1, 40}} + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, .*, .*}} + ; CHECK: {{.* r7, .*, .*}} + ; CHECK: {{.* r8, .*, .*}} + ; CHECK: {{.* r9, .*, .*}} + ; CHECK: {{.* r10, .*, .*}} + ; CHECK: brlid + call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.14) + ; CHECK: {{.* r5, .*, .*}} + ; CHECK: {{.* r6, r3, r0}} + ; CHECK-NOT: {{.* r7, .*, .*}} + ; CHECK: brlid + + ret void +} diff --git a/test/CodeGen/MBlaze/dg.exp b/test/CodeGen/MBlaze/dg.exp new file mode 100644 index 0000000000..bfd5e47157 --- /dev/null +++ b/test/CodeGen/MBlaze/dg.exp @@ -0,0 +1,5 @@ +load_lib llvm.exp + +if { [llvm_supports_target MBlaze] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] +} diff --git a/test/CodeGen/MBlaze/div.ll b/test/CodeGen/MBlaze/div.ll new file mode 100644 index 0000000000..fae9830619 --- /dev/null +++ b/test/CodeGen/MBlaze/div.ll @@ -0,0 +1,75 @@ +; Ensure that multiplication is lowered to function calls when the multiplier +; unit is not available in the hardware and that function calls are not used +; when the multiplier unit is available in the hardware. +; +; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s +; RUN: llc < %s -march=mblaze -mattr=+div | FileCheck -check-prefix=DIV %s + +define i8 @test_i8(i8 %a, i8 %b) { + ; FUN: test_i8: + ; DIV: test_i8: + + %tmp.1 = udiv i8 %a, %b + ; FUN-NOT: idiv + ; FUN: brlid + ; DIV-NOT: brlid + ; DIV: idivu + + %tmp.2 = sdiv i8 %a, %b + ; FUN-NOT: idiv + ; FUN: brlid + ; DIV-NOT: brlid + ; DIV-NOT: idivu + ; DIV: idiv + + %tmp.3 = add i8 %tmp.1, %tmp.2 + ret i8 %tmp.3 + ; FUN: rtsd + ; DIV: rtsd +} + +define i16 @test_i16(i16 %a, i16 %b) { + ; FUN: test_i16: + ; DIV: test_i16: + + %tmp.1 = udiv i16 %a, %b + ; FUN-NOT: idiv + ; FUN: brlid + ; DIV-NOT: brlid + ; DIV: idivu + + %tmp.2 = sdiv i16 %a, %b + ; FUN-NOT: idiv + ; FUN: brlid + ; DIV-NOT: brlid + ; DIV-NOT: idivu + ; DIV: idiv + + %tmp.3 = add i16 %tmp.1, %tmp.2 + ret i16 %tmp.3 + ; FUN: rtsd + ; DIV: rtsd +} + +define i32 @test_i32(i32 %a, i32 %b) { + ; FUN: test_i32: + ; DIV: test_i32: + + %tmp.1 = udiv i32 %a, %b + ; FUN-NOT: idiv + ; FUN: brlid + ; DIV-NOT: brlid + ; DIV: idivu + + %tmp.2 = sdiv i32 %a, %b + ; FUN-NOT: idiv + ; FUN: brlid + ; DIV-NOT: brlid + ; DIV-NOT: idivu + ; DIV: idiv + + %tmp.3 = add i32 %tmp.1, %tmp.2 + ret i32 %tmp.3 + ; FUN: rtsd + ; DIV: rtsd +} diff --git a/test/CodeGen/MBlaze/fpu.ll b/test/CodeGen/MBlaze/fpu.ll new file mode 100644 index 0000000000..83f4d83124 --- /dev/null +++ b/test/CodeGen/MBlaze/fpu.ll @@ -0,0 +1,66 @@ +; Ensure that floating point operations are lowered to function calls when the +; FPU is not available in the hardware and that function calls are not used +; when the FPU is available in the hardware. +; +; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s +; RUN: llc < %s -march=mblaze -mattr=+fpu | FileCheck -check-prefix=FPU %s + +define float @test_add(float %a, float %b) { + ; FUN: test_add: + ; FPU: test_add: + + %tmp.1 = fadd float %a, %b + ; FUN-NOT: fadd + ; FUN: brlid + ; FPU-NOT: brlid + ; FPU: fadd + + ret float %tmp.1 + ; FUN: rtsd + ; FPU: rtsd +} + +define float @test_sub(float %a, float %b) { + ; FUN: test_sub: + ; FPU: test_sub: + + %tmp.1 = fsub float %a, %b + ; FUN-NOT: frsub + ; FUN: brlid + ; FPU-NOT: brlid + ; FPU: frsub + + ret float %tmp.1 + ; FUN: rtsd + ; FPU: rtsd +} + +define float @test_mul(float %a, float %b) { + ; FUN: test_mul: + ; FPU: test_mul: + + %tmp.1 = fmul float %a, %b + ; FUN-NOT: fmul + ; FUN: brlid + ; FPU-NOT: brlid + ; FPU: fmul + + ret float %tmp.1 + ; FUN: rtsd + ; FPU: rtsd +} + +define float @test_div(float %a, float %b) { + ; FUN: test_div: + ; FPU: test_div: + + %tmp.1 = fdiv float %a, %b + ; FUN-NOT: fdiv + ; FUN: brlid + ; FPU-NOT: brlid + ; FPU: fdiv + + ret float %tmp.1 + ; FUN: rtsd + ; FPU: rtsd +} diff --git a/test/CodeGen/MBlaze/fsl.ll b/test/CodeGen/MBlaze/fsl.ll new file mode 100644 index 0000000000..f9c6205bc1 --- /dev/null +++ b/test/CodeGen/MBlaze/fsl.ll @@ -0,0 +1,323 @@ +; Ensure that the FSL instrinsic instruction generate single FSL instructions +; at the machine level. Additionally, ensure that dynamic values use the +; dynamic version of the instructions and that constant values use the +; constant version of the instructions. +; +; RUN: llc < %s -march=mblaze | FileCheck %s + +declare i32 @llvm.mblaze.fsl.get(i32 %port) +declare i32 @llvm.mblaze.fsl.aget(i32 %port) +declare i32 @llvm.mblaze.fsl.cget(i32 %port) +declare i32 @llvm.mblaze.fsl.caget(i32 %port) +declare i32 @llvm.mblaze.fsl.eget(i32 %port) +declare i32 @llvm.mblaze.fsl.eaget(i32 %port) +declare i32 @llvm.mblaze.fsl.ecget(i32 %port) +declare i32 @llvm.mblaze.fsl.ecaget(i32 %port) +declare i32 @llvm.mblaze.fsl.nget(i32 %port) +declare i32 @llvm.mblaze.fsl.naget(i32 %port) +declare i32 @llvm.mblaze.fsl.ncget(i32 %port) +declare i32 @llvm.mblaze.fsl.ncaget(i32 %port) +declare i32 @llvm.mblaze.fsl.neget(i32 %port) +declare i32 @llvm.mblaze.fsl.neaget(i32 %port) +declare i32 @llvm.mblaze.fsl.necget(i32 %port) +declare i32 @llvm.mblaze.fsl.necaget(i32 %port) +declare i32 @llvm.mblaze.fsl.tget(i32 %port) +declare i32 @llvm.mblaze.fsl.taget(i32 %port) +declare i32 @llvm.mblaze.fsl.tcget(i32 %port) +declare i32 @llvm.mblaze.fsl.tcaget(i32 %port) +declare i32 @llvm.mblaze.fsl.teget(i32 %port) +declare i32 @llvm.mblaze.fsl.teaget(i32 %port) +declare i32 @llvm.mblaze.fsl.tecget(i32 %port) +declare i32 @llvm.mblaze.fsl.tecaget(i32 %port) +declare i32 @llvm.mblaze.fsl.tnget(i32 %port) +declare i32 @llvm.mblaze.fsl.tnaget(i32 %port) +declare i32 @llvm.mblaze.fsl.tncget(i32 %port) +declare i32 @llvm.mblaze.fsl.tncaget(i32 %port) +declare i32 @llvm.mblaze.fsl.tneget(i32 %port) +declare i32 @llvm.mblaze.fsl.tneaget(i32 %port) +declare i32 @llvm.mblaze.fsl.tnecget(i32 %port) +declare i32 @llvm.mblaze.fsl.tnecaget(i32 %port) + +declare void @llvm.mblaze.fsl.put(i32 %value, i32 %port) +declare void @llvm.mblaze.fsl.aput(i32 %value, i32 %port) +declare void @llvm.mblaze.fsl.cput(i32 %value, i32 %port) +declare void @llvm.mblaze.fsl.caput(i32 %value, i32 %port) +declare void @llvm.mblaze.fsl.nput(i32 %value, i32 %port) +declare void @llvm.mblaze.fsl.naput(i32 %value, i32 %port) +declare void @llvm.mblaze.fsl.ncput(i32 %value, i32 %port) +declare void @llvm.mblaze.fsl.ncaput(i32 %value, i32 %port) +declare void @llvm.mblaze.fsl.tput(i32 %port) +declare void @llvm.mblaze.fsl.taput(i32 %port) +declare void @llvm.mblaze.fsl.tcput(i32 %port) +declare void @llvm.mblaze.fsl.tcaput(i32 %port) +declare void @llvm.mblaze.fsl.tnput(i32 %port) +declare void @llvm.mblaze.fsl.tnaput(i32 %port) +declare void @llvm.mblaze.fsl.tncput(i32 %port) +declare void @llvm.mblaze.fsl.tncaput(i32 %port) + +define i32 @fsl_get(i32 %port) +{ + ; CHECK: fsl_get: + %v0 = call i32 @llvm.mblaze.fsl.get(i32 %port) + ; CHECK: getd + %v1 = call i32 @llvm.mblaze.fsl.aget(i32 %port) + ; CHECK-NEXT: agetd + %v2 = call i32 @llvm.mblaze.fsl.cget(i32 %port) + ; CHECK-NEXT: cgetd + %v3 = call i32 @llvm.mblaze.fsl.caget(i32 %port) + ; CHECK-NEXT: cagetd + %v4 = call i32 @llvm.mblaze.fsl.eget(i32 %port) + ; CHECK-NEXT: egetd + %v5 = call i32 @llvm.mblaze.fsl.eaget(i32 %port) + ; CHECK-NEXT: eagetd + %v6 = call i32 @llvm.mblaze.fsl.ecget(i32 %port) + ; CHECK-NEXT: ecgetd + %v7 = call i32 @llvm.mblaze.fsl.ecaget(i32 %port) + ; CHECK-NEXT: ecagetd + %v8 = call i32 @llvm.mblaze.fsl.nget(i32 %port) + ; CHECK-NEXT: ngetd + %v9 = call i32 @llvm.mblaze.fsl.naget(i32 %port) + ; CHECK-NEXT: nagetd + %v10 = call i32 @llvm.mblaze.fsl.ncget(i32 %port) + ; CHECK-NEXT: ncgetd + %v11 = call i32 @llvm.mblaze.fsl.ncaget(i32 %port) + ; CHECK-NEXT: ncagetd + %v12 = call i32 @llvm.mblaze.fsl.neget(i32 %port) + ; CHECK-NEXT: negetd + %v13 = call i32 @llvm.mblaze.fsl.neaget(i32 %port) + ; CHECK-NEXT: neagetd + %v14 = call i32 @llvm.mblaze.fsl.necget(i32 %port) + ; CHECK-NEXT: necgetd + %v15 = call i32 @llvm.mblaze.fsl.necaget(i32 %port) + ; CHECK-NEXT: necagetd + %v16 = call i32 @llvm.mblaze.fsl.tget(i32 %port) + ; CHECK-NEXT: tgetd + %v17 = call i32 @llvm.mblaze.fsl.taget(i32 %port) + ; CHECK-NEXT: tagetd + %v18 = call i32 @llvm.mblaze.fsl.tcget(i32 %port) + ; CHECK-NEXT: tcgetd + %v19 = call i32 @llvm.mblaze.fsl.tcaget(i32 %port) + ; CHECK-NEXT: tcagetd + %v20 = call i32 @llvm.mblaze.fsl.teget(i32 %port) + ; CHECK-NEXT: tegetd + %v21 = call i32 @llvm.mblaze.fsl.teaget(i32 %port) + ; CHECK-NEXT: teagetd + %v22 = call i32 @llvm.mblaze.fsl.tecget(i32 %port) + ; CHECK-NEXT: tecgetd + %v23 = call i32 @llvm.mblaze.fsl.tecaget(i32 %port) + ; CHECK-NEXT: tecagetd + %v24 = call i32 @llvm.mblaze.fsl.tnget(i32 %port) + ; CHECK-NEXT: tngetd + %v25 = call i32 @llvm.mblaze.fsl.tnaget(i32 %port) + ; CHECK-NEXT: tnagetd + %v26 = call i32 @llvm.mblaze.fsl.tncget(i32 %port) + ; CHECK-NEXT: tncgetd + %v27 = call i32 @llvm.mblaze.fsl.tncaget(i32 %port) + ; CHECK-NEXT: tncagetd + %v28 = call i32 @llvm.mblaze.fsl.tneget(i32 %port) + ; CHECK-NEXT: tnegetd + %v29 = call i32 @llvm.mblaze.fsl.tneaget(i32 %port) + ; CHECK-NEXT: tneagetd + %v30 = call i32 @llvm.mblaze.fsl.tnecget(i32 %port) + ; CHECK-NEXT: tnecgetd + %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 %port) + ; CHECK-NEXT: tnecagetd + ret i32 1 + ; CHECK: rtsd +} + +define i32 @fslc_get() +{ + ; CHECK: fslc_get: + %v0 = call i32 @llvm.mblaze.fsl.get(i32 1) + ; CHECK: get + %v1 = call i32 @llvm.mblaze.fsl.aget(i32 1) + ; CHECK-NOT: agetd + ; CHECK: aget + %v2 = call i32 @llvm.mblaze.fsl.cget(i32 1) + ; CHECK-NOT: cgetd + ; CHECK: cget + %v3 = call i32 @llvm.mblaze.fsl.caget(i32 1) + ; CHECK-NOT: cagetd + ; CHECK: caget + %v4 = call i32 @llvm.mblaze.fsl.eget(i32 1) + ; CHECK-NOT: egetd + ; CHECK: eget + %v5 = call i32 @llvm.mblaze.fsl.eaget(i32 1) + ; CHECK-NOT: eagetd + ; CHECK: eaget + %v6 = call i32 @llvm.mblaze.fsl.ecget(i32 1) + ; CHECK-NOT: ecgetd + ; CHECK: ecget + %v7 = call i32 @llvm.mblaze.fsl.ecaget(i32 1) + ; CHECK-NOT: ecagetd + ; CHECK: ecaget + %v8 = call i32 @llvm.mblaze.fsl.nget(i32 1) + ; CHECK-NOT: ngetd + ; CHECK: nget + %v9 = call i32 @llvm.mblaze.fsl.naget(i32 1) + ; CHECK-NOT: nagetd + ; CHECK: naget + %v10 = call i32 @llvm.mblaze.fsl.ncget(i32 1) + ; CHECK-NOT: ncgetd + ; CHECK: ncget + %v11 = call i32 @llvm.mblaze.fsl.ncaget(i32 1) + ; CHECK-NOT: ncagetd + ; CHECK: ncaget + %v12 = call i32 @llvm.mblaze.fsl.neget(i32 1) + ; CHECK-NOT: negetd + ; CHECK: neget + %v13 = call i32 @llvm.mblaze.fsl.neaget(i32 1) + ; CHECK-NOT: neagetd + ; CHECK: neaget + %v14 = call i32 @llvm.mblaze.fsl.necget(i32 1) + ; CHECK-NOT: necgetd + ; CHECK: necget + %v15 = call i32 @llvm.mblaze.fsl.necaget(i32 1) + ; CHECK-NOT: necagetd + ; CHECK: necaget + %v16 = call i32 @llvm.mblaze.fsl.tget(i32 1) + ; CHECK-NOT: tgetd + ; CHECK: tget + %v17 = call i32 @llvm.mblaze.fsl.taget(i32 1) + ; CHECK-NOT: tagetd + ; CHECK: taget + %v18 = call i32 @llvm.mblaze.fsl.tcget(i32 1) + ; CHECK-NOT: tcgetd + ; CHECK: tcget + %v19 = call i32 @llvm.mblaze.fsl.tcaget(i32 1) + ; CHECK-NOT: tcagetd + ; CHECK: tcaget + %v20 = call i32 @llvm.mblaze.fsl.teget(i32 1) + ; CHECK-NOT: tegetd + ; CHECK: teget + %v21 = call i32 @llvm.mblaze.fsl.teaget(i32 1) + ; CHECK-NOT: teagetd + ; CHECK: teaget + %v22 = call i32 @llvm.mblaze.fsl.tecget(i32 1) + ; CHECK-NOT: tecgetd + ; CHECK: tecget + %v23 = call i32 @llvm.mblaze.fsl.tecaget(i32 1) + ; CHECK-NOT: tecagetd + ; CHECK: tecaget + %v24 = call i32 @llvm.mblaze.fsl.tnget(i32 1) + ; CHECK-NOT: tngetd + ; CHECK: tnget + %v25 = call i32 @llvm.mblaze.fsl.tnaget(i32 1) + ; CHECK-NOT: tnagetd + ; CHECK: tnaget + %v26 = call i32 @llvm.mblaze.fsl.tncget(i32 1) + ; CHECK-NOT: tncgetd + ; CHECK: tncget + %v27 = call i32 @llvm.mblaze.fsl.tncaget(i32 1) + ; CHECK-NOT: tncagetd + ; CHECK: tncaget + %v28 = call i32 @llvm.mblaze.fsl.tneget(i32 1) + ; CHECK-NOT: tnegetd + ; CHECK: tneget + %v29 = call i32 @llvm.mblaze.fsl.tneaget(i32 1) + ; CHECK-NOT: tneagetd + ; CHECK: tneaget + %v30 = call i32 @llvm.mblaze.fsl.tnecget(i32 1) + ; CHECK-NOT: tnecgetd + ; CHECK: tnecget + %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 1) + ; CHECK-NOT: tnecagetd + ; CHECK: tnecaget + ret i32 1 + ; CHECK: rtsd +} + +define void @putfsl(i32 %value, i32 %port) +{ + ; CHECK: putfsl: + call void @llvm.mblaze.fsl.put(i32 %value, i32 %port) + ; CHECK: putd + call void @llvm.mblaze.fsl.aput(i32 %value, i32 %port) + ; CHECK-NEXT: aputd + call void @llvm.mblaze.fsl.cput(i32 %value, i32 %port) + ; CHECK-NEXT: cputd + call void @llvm.mblaze.fsl.caput(i32 %value, i32 %port) + ; CHECK-NEXT: caputd + call void @llvm.mblaze.fsl.nput(i32 %value, i32 %port) + ; CHECK-NEXT: nputd + call void @llvm.mblaze.fsl.naput(i32 %value, i32 %port) + ; CHECK-NEXT: naputd + call void @llvm.mblaze.fsl.ncput(i32 %value, i32 %port) + ; CHECK-NEXT: ncputd + call void @llvm.mblaze.fsl.ncaput(i32 %value, i32 %port) + ; CHECK-NEXT: ncaputd + call void @llvm.mblaze.fsl.tput(i32 %port) + ; CHECK-NEXT: tputd + call void @llvm.mblaze.fsl.taput(i32 %port) + ; CHECK-NEXT: taputd + call void @llvm.mblaze.fsl.tcput(i32 %port) + ; CHECK-NEXT: tcputd + call void @llvm.mblaze.fsl.tcaput(i32 %port) + ; CHECK-NEXT: tcaputd + call void @llvm.mblaze.fsl.tnput(i32 %port) + ; CHECK-NEXT: tnputd + call void @llvm.mblaze.fsl.tnaput(i32 %port) + ; CHECK-NEXT: tnaputd + call void @llvm.mblaze.fsl.tncput(i32 %port) + ; CHECK-NEXT: tncputd + call void @llvm.mblaze.fsl.tncaput(i32 %port) + ; CHECK-NEXT: tncaputd + ret void + ; CHECK: rtsd +} + +define void @putfsl_const(i32 %value) +{ + ; CHECK: putfsl_const: + call void @llvm.mblaze.fsl.put(i32 %value, i32 1) + ; CHECK-NOT: putd + ; CHECK: put + call void @llvm.mblaze.fsl.aput(i32 %value, i32 1) + ; CHECK-NOT: aputd + ; CHECK: aput + call void @llvm.mblaze.fsl.cput(i32 %value, i32 1) + ; CHECK-NOT: cputd + ; CHECK: cput + call void @llvm.mblaze.fsl.caput(i32 %value, i32 1) + ; CHECK-NOT: caputd + ; CHECK: caput + call void @llvm.mblaze.fsl.nput(i32 %value, i32 1) + ; CHECK-NOT: nputd + ; CHECK: nput + call void @llvm.mblaze.fsl.naput(i32 %value, i32 1) + ; CHECK-NOT: naputd + ; CHECK: naput + call void @llvm.mblaze.fsl.ncput(i32 %value, i32 1) + ; CHECK-NOT: ncputd + ; CHECK: ncput + call void @llvm.mblaze.fsl.ncaput(i32 %value, i32 1) + ; CHECK-NOT: ncaputd + ; CHECK: ncaput + call void @llvm.mblaze.fsl.tput(i32 1) + ; CHECK-NOT: tputd + ; CHECK: tput + call void @llvm.mblaze.fsl.taput(i32 1) + ; CHECK-NOT: taputd + ; CHECK: taput + call void @llvm.mblaze.fsl.tcput(i32 1) + ; CHECK-NOT: tcputd + ; CHECK: tcput + call void @llvm.mblaze.fsl.tcaput(i32 1) + ; CHECK-NOT: tcaputd + ; CHECK: tcaput + call void @llvm.mblaze.fsl.tnput(i32 1) + ; CHECK-NOT: tnputd + ; CHECK: tnput + call void @llvm.mblaze.fsl.tnaput(i32 1) + ; CHECK-NOT: tnaputd + ; CHECK: tnaput + call void @llvm.mblaze.fsl.tncput(i32 1) + ; CHECK-NOT: tncputd + ; CHECK: tncput + call void @llvm.mblaze.fsl.tncaput(i32 1) + ; CHECK-NOT: tncaputd + ; CHECK: tncaput + ret void + ; CHECK: rtsd +} diff --git a/test/CodeGen/MBlaze/imm.ll b/test/CodeGen/MBlaze/imm.ll new file mode 100644 index 0000000000..85fad175b7 --- /dev/null +++ b/test/CodeGen/MBlaze/imm.ll @@ -0,0 +1,70 @@ +; Ensure that all immediate values that are 32-bits or less can be loaded +; using a single instruction and that immediate values 64-bits or less can +; be loaded using two instructions. +; +; RUN: llc < %s -march=mblaze | FileCheck %s +; RUN: llc < %s -march=mblaze -mattr=+fpu | FileCheck -check-prefix=FPU %s + +define i8 @retimm_i8() { + ; CHECK: retimm_i8: + ; CHECK: add + ; CHECK-NEXT: rtsd + ; FPU: retimm_i8: + ; FPU: add + ; FPU-NEXT: rtsd + ret i8 123 +} + +define i16 @retimm_i16() { + ; CHECK: retimm_i16: + ; CHECK: add + ; CHECK-NEXT: rtsd + ; FPU: retimm_i16: + ; FPU: add + ; FPU-NEXT: rtsd + ret i16 38212 +} + +define i32 @retimm_i32() { + ; CHECK: retimm_i32: + ; CHECK: add + ; CHECK-NEXT: rtsd + ; FPU: retimm_i32: + ; FPU: add + ; FPU-NEXT: rtsd + ret i32 2938128 +} + +define i64 @retimm_i64() { + ; CHECK: retimm_i64: + ; CHECK: add + ; CHECK-NEXT: add + ; CHECK-NEXT: rtsd + ; FPU: retimm_i64: + ; FPU: add + ; FPU-NEXT: add + ; FPU-NEXT: rtsd + ret i64 94581823 +} + +define float @retimm_float() { + ; CHECK: retimm_float: + ; CHECK: add + ; CHECK-NEXT: rtsd + ; FPU: retimm_float: + ; FPU: or + ; FPU: rtsd + ret float 12.0 +} + +define double @retimm_double() { + ; CHECK: retimm_double: + ; CHECK: add + ; CHECK-NEXT: add + ; CHECK-NEXT: rtsd + ; FPU: retimm_double: + ; FPU: add + ; FPU-NEXT: add + ; FPU-NEXT: rtsd + ret double 598382.39283873 +} diff --git a/test/CodeGen/MBlaze/jumptable.ll b/test/CodeGen/MBlaze/jumptable.ll new file mode 100644 index 0000000000..3f27c12f19 --- /dev/null +++ b/test/CodeGen/MBlaze/jumptable.ll @@ -0,0 +1,79 @@ +; Ensure that jump tables can be handled by the mblaze backend. The +; jump table should be lowered to a "br" instruction using one of the +; available registers. +; +; RUN: llc < %s -march=mblaze | FileCheck %s + +define i32 @jmptable(i32 %arg) +{ + ; CHECK: jmptable: + switch i32 %arg, label %DEFAULT [ i32 0, label %L0 + i32 1, label %L1 + i32 2, label %L2 + i32 3, label %L3 + i32 4, label %L4 + i32 5, label %L5 + i32 6, label %L6 + i32 7, label %L7 + i32 8, label %L8 + i32 9, label %L9 ] + + ; CHECK: lw [[REG:r[0-9]*]] + ; CHECK: br [[REG]] +L0: + %var0 = add i32 %arg, 0 + br label %DONE + +L1: + %var1 = add i32 %arg, 1 + br label %DONE + +L2: + %var2 = add i32 %arg, 2 + br label %DONE + +L3: + %var3 = add i32 %arg, 3 + br label %DONE + +L4: + %var4 = add i32 %arg, 4 + br label %DONE + +L5: + %var5 = add i32 %arg, 5 + br label %DONE + +L6: + %var6 = add i32 %arg, 6 + br label %DONE + +L7: + %var7 = add i32 %arg, 7 + br label %DONE + +L8: + %var8 = add i32 %arg, 8 + br label %DONE + +L9: + %var9 = add i32 %arg, 9 + br label %DONE + +DEFAULT: + unreachable + +DONE: + %rval = phi i32 [ %var0, %L0 ], + [ %var1, %L1 ], + [ %var2, %L2 ], + [ %var3, %L3 ], + [ %var4, %L4 ], + [ %var5, %L5 ], + [ %var6, %L6 ], + [ %var7, %L7 ], + [ %var8, %L8 ], + [ %var9, %L9 ] + ret i32 %rval + ; CHECK: rtsd +} diff --git a/test/CodeGen/MBlaze/loop.ll b/test/CodeGen/MBlaze/loop.ll new file mode 100644 index 0000000000..b473020e66 --- /dev/null +++ b/test/CodeGen/MBlaze/loop.ll @@ -0,0 +1,47 @@ +; Test some complicated looping constructs to ensure that they +; compile successfully and that some sort of branching is used +; in the resulting code. +; +; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s + +declare i32 @printf(i8*, ...) +@MSG = internal constant [19 x i8] c"Message: %d %d %d\0A\00" + +define i32 @loop(i32 %a, i32 %b) +{ + ; CHECK: loop: +entry: + br label %loop_outer + +loop_outer: + %outer.0 = phi i32 [ 0, %entry ], [ %outer.2, %loop_outer_finish ] + br label %loop_inner + +loop_inner: + %inner.0 = phi i32 [ %a, %loop_outer ], [ %inner.3, %loop_inner_finish ] + %inner.1 = phi i32 [ %b, %loop_outer ], [ %inner.4, %loop_inner_finish ] + %inner.2 = phi i32 [ 0, %loop_outer ], [ %inner.5, %loop_inner_finish ] + %inner.3 = add i32 %inner.0, %inner.1 + %inner.4 = mul i32 %inner.2, 11 + br label %loop_inner_finish + +loop_inner_finish: + %inner.5 = add i32 %inner.2, 1 + ; CHECK: addi {{.*, 1}} + + call i32 (i8*,...)* @printf( i8* getelementptr([19 x i8]* @MSG,i32 0,i32 0), + i32 %inner.0, i32 %inner.1, i32 %inner.2 ) + ; CHECK: brlid + + %inner.6 = icmp eq i32 %inner.5, 100 + ; CHECK: cmp + + br i1 %inner.6, label %loop_inner, label %loop_outer_finish + ; CHECK: {{beq|bne}} + +loop_outer_finish: + %outer.1 = add i32 %outer.0, 1 + %outer.2 = urem i32 %outer.1, 1500 + br label %loop_outer + ; CHECK: br +} diff --git a/test/CodeGen/MBlaze/mul.ll b/test/CodeGen/MBlaze/mul.ll new file mode 100644 index 0000000000..65d3e22a3e --- /dev/null +++ b/test/CodeGen/MBlaze/mul.ll @@ -0,0 +1,51 @@ +; Ensure that multiplication is lowered to function calls when the multiplier +; unit is not available in the hardware and that function calls are not used +; when the multiplier unit is available in the hardware. +; +; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s +; RUN: llc < %s -march=mblaze -mattr=+mul | FileCheck -check-prefix=MUL %s + +define i8 @test_i8(i8 %a, i8 %b) { + ; FUN: test_i8: + ; MUL: test_i8: + + %tmp.1 = mul i8 %a, %b + ; FUN-NOT: mul + ; FUN: brlid + ; MUL-NOT: brlid + ; MUL: mul + + ret i8 %tmp.1 + ; FUN: rtsd + ; MUL: rtsd +} + +define i16 @test_i16(i16 %a, i16 %b) { + ; FUN: test_i16: + ; MUL: test_i16: + + %tmp.1 = mul i16 %a, %b + ; FUN-NOT: mul + ; FUN: brlid + ; MUL-NOT: brlid + ; MUL: mul + + ret i16 %tmp.1 + ; FUN: rtsd + ; MUL: rtsd +} + +define i32 @test_i32(i32 %a, i32 %b) { + ; FUN: test_i32: + ; MUL: test_i32: + + %tmp.1 = mul i32 %a, %b + ; FUN-NOT: mul + ; FUN: brlid + ; MUL-NOT: brlid + ; MUL: mul + + ret i32 %tmp.1 + ; FUN: rtsd + ; MUL: rtsd +} diff --git a/test/CodeGen/MBlaze/mul64.ll b/test/CodeGen/MBlaze/mul64.ll new file mode 100644 index 0000000000..e0ef4138af --- /dev/null +++ b/test/CodeGen/MBlaze/mul64.ll @@ -0,0 +1,23 @@ +; Ensure that multiplication is lowered to function calls when the 64-bit +; multiplier unit is not available in the hardware and that function calls +; are not used when the 64-bit multiplier unit is available in the hardware. +; +; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s +; RUN: llc < %s -march=mblaze -mattr=+mul,+mul64 | \ +; RUN: FileCheck -check-prefix=MUL %s + +define i64 @test_i64(i64 %a, i64 %b) { + ; FUN: test_i64: + ; MUL: test_i64: + + %tmp.1 = mul i64 %a, %b + ; FUN-NOT: mul + ; FUN: brlid + ; MUL-NOT: brlid + ; MUL: mulh + ; MUL: mul + + ret i64 %tmp.1 + ; FUN: rtsd + ; MUL: rtsd +} diff --git a/test/CodeGen/MBlaze/select.ll b/test/CodeGen/MBlaze/select.ll new file mode 100644 index 0000000000..47a88a1e3c --- /dev/null +++ b/test/CodeGen/MBlaze/select.ll @@ -0,0 +1,15 @@ +; Ensure that the select instruction is supported and is lowered to +; some sort of branch instruction. +; +; RUN: llc < %s -march=mblaze | FileCheck %s + +define i32 @testsel(i32 %a, i32 %b) +{ + ; CHECK: testsel: + %tmp.1 = icmp eq i32 %a, %b + ; CHECK: cmp + %tmp.2 = select i1 %tmp.1, i32 %a, i32 %b + ; CHECK: {{bne|beq}} + ret i32 %tmp.2 + ; CHECK: rtsd +} diff --git a/test/CodeGen/MBlaze/shift.ll b/test/CodeGen/MBlaze/shift.ll new file mode 100644 index 0000000000..186115ec19 --- /dev/null +++ b/test/CodeGen/MBlaze/shift.ll @@ -0,0 +1,117 @@ +; Ensure that shifts are lowered to loops when the barrel shifter unit is +; not available in the hardware and that loops are not used when the +; barrel shifter unit is available in the hardware. +; +; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s +; RUN: llc < %s -march=mblaze -mattr=+barrel | FileCheck -check-prefix=SHT %s + +define i8 @test_i8(i8 %a, i8 %b) { + ; FUN: test_i8: + ; SHT: test_i8: + + %tmp.1 = shl i8 %a, %b + ; FUN-NOT: bsll + ; FUN: andi + ; FUN: add + ; FUN: bnei + ; SHT-NOT: andi + ; SHT-NOT: bnei + ; SHT: bsll + + ret i8 %tmp.1 + ; FUN: rtsd + ; SHT: rtsd +} + +define i8 @testc_i8(i8 %a, i8 %b) { + ; FUN: testc_i8: + ; SHT: testc_i8: + + %tmp.1 = shl i8 %a, 5 + ; FUN-NOT: bsll + ; FUN: andi + ; FUN: add + ; FUN: bnei + ; SHT-NOT: andi + ; SHT-NOT: add + ; SHT-NOT: bnei + ; SHT: bslli + + ret i8 %tmp.1 + ; FUN: rtsd + ; SHT: rtsd +} + +define i16 @test_i16(i16 %a, i16 %b) { + ; FUN: test_i16: + ; SHT: test_i16: + + %tmp.1 = shl i16 %a, %b + ; FUN-NOT: bsll + ; FUN: andi + ; FUN: add + ; FUN: bnei + ; SHT-NOT: andi + ; SHT-NOT: bnei + ; SHT: bsll + + ret i16 %tmp.1 + ; FUN: rtsd + ; SHT: rtsd +} + +define i16 @testc_i16(i16 %a, i16 %b) { + ; FUN: testc_i16: + ; SHT: testc_i16: + + %tmp.1 = shl i16 %a, 5 + ; FUN-NOT: bsll + ; FUN: andi + ; FUN: add + ; FUN: bnei + ; SHT-NOT: andi + ; SHT-NOT: add + ; SHT-NOT: bnei + ; SHT: bslli + + ret i16 %tmp.1 + ; FUN: rtsd + ; SHT: rtsd +} + +define i32 @test_i32(i32 %a, i32 %b) { + ; FUN: test_i32: + ; SHT: test_i32: + + %tmp.1 = shl i32 %a, %b + ; FUN-NOT: bsll + ; FUN: andi + ; FUN: add + ; FUN: bnei + ; SHT-NOT: andi + ; SHT-NOT: bnei + ; SHT: bsll + + ret i32 %tmp.1 + ; FUN: rtsd + ; SHT: rtsd +} + +define i32 @testc_i32(i32 %a, i32 %b) { + ; FUN: testc_i32: + ; SHT: testc_i32: + + %tmp.1 = shl i32 %a, 5 + ; FUN-NOT: bsll + ; FUN: andi + ; FUN: add + ; FUN: bnei + ; SHT-NOT: andi + ; SHT-NOT: add + ; SHT-NOT: bnei + ; SHT: bslli + + ret i32 %tmp.1 + ; FUN: rtsd + ; SHT: rtsd +} diff --git a/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/test/CodeGen/MSP430/AddrMode-bis-rx.ll index 3340494f6b..4f9a7248bb 100644 --- a/test/CodeGen/MSP430/AddrMode-bis-rx.ll +++ b/test/CodeGen/MSP430/AddrMode-bis-rx.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s +; RUN: llc < %s -march=msp430 | FileCheck %s target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16" target triple = "msp430-generic-generic" @@ -29,7 +29,7 @@ define i8 @am3(i8 %x, i16 %n) nounwind { ret i8 %3 } ; CHECK: am3: -; CHECK: bis.b &bar(r14), r15 +; CHECK: bis.b bar(r14), r15 define i16 @am4(i16 %x) nounwind { %1 = volatile load i16* inttoptr(i16 32 to i16*) @@ -70,5 +70,5 @@ define i8 @am7(i8 %x, i16 %n) nounwind { ret i8 %4 } ; CHECK: am7: -; CHECK: bis.b &duh+2(r14), r15 +; CHECK: bis.b duh+2(r14), r15 diff --git a/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/test/CodeGen/MSP430/AddrMode-bis-xr.ll index ca79fb6d33..17ebd87368 100644 --- a/test/CodeGen/MSP430/AddrMode-bis-xr.ll +++ b/test/CodeGen/MSP430/AddrMode-bis-xr.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s +; RUN: llc < %s -march=msp430 | FileCheck %s target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:16" target triple = "msp430-generic-generic" @@ -32,7 +32,7 @@ define void @am3(i16 %i, i8 %x) nounwind { ret void } ; CHECK: am3: -; CHECK: bis.b r14, &bar(r15) +; CHECK: bis.b r14, bar(r15) define void @am4(i16 %x) nounwind { %1 = volatile load i16* inttoptr(i16 32 to i16*) @@ -77,5 +77,5 @@ define void @am7(i16 %n, i8 %x) nounwind { ret void } ; CHECK: am7: -; CHECK: bis.b r14, &duh+2(r15) +; CHECK: bis.b r14, duh+2(r15) diff --git a/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/test/CodeGen/MSP430/AddrMode-mov-rx.ll index 67cbb021c8..6676b88cd1 100644 --- a/test/CodeGen/MSP430/AddrMode-mov-rx.ll +++ b/test/CodeGen/MSP430/AddrMode-mov-rx.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s +; RUN: llc < %s -march=msp430 | FileCheck %s target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16" target triple = "msp430-generic-generic" @@ -26,7 +26,7 @@ define i8 @am3(i16 %n) nounwind { ret i8 %2 } ; CHECK: am3: -; CHECK: mov.b &bar(r15), r15 +; CHECK: mov.b bar(r15), r15 define i16 @am4() nounwind { %1 = volatile load i16* inttoptr(i16 32 to i16*) @@ -63,5 +63,5 @@ define i8 @am7(i16 %n) nounwind { ret i8 %3 } ; CHECK: am7: -; CHECK: mov.b &duh+2(r15), r15 +; CHECK: mov.b duh+2(r15), r15 diff --git a/test/CodeGen/MSP430/AddrMode-mov-xr.ll b/test/CodeGen/MSP430/AddrMode-mov-xr.ll index b8155d3a55..4b327b0578 100644 --- a/test/CodeGen/MSP430/AddrMode-mov-xr.ll +++ b/test/CodeGen/MSP430/AddrMode-mov-xr.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s +; RUN: llc < %s -march=msp430 | FileCheck %s target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16" target triple = "msp430-generic-generic" @@ -26,7 +26,7 @@ define void @am3(i16 %i, i8 %a) nounwind { ret void } ; CHECK: am3: -; CHECK: mov.b r14, &bar(r15) +; CHECK: mov.b r14, bar(r15) define void @am4(i16 %a) nounwind { volatile store i16 %a, i16* inttoptr(i16 32 to i16*) @@ -63,5 +63,5 @@ define void @am7(i16 %n, i8 %a) nounwind { ret void } ; CHECK: am7: -; CHECK: mov.b r14, &duh+2(r15) +; CHECK: mov.b r14, duh+2(r15) diff --git a/test/CodeGen/MSP430/Inst16mm.ll b/test/CodeGen/MSP430/Inst16mm.ll index 510afe3734..2337c2c0f2 100644 --- a/test/CodeGen/MSP430/Inst16mm.ll +++ b/test/CodeGen/MSP430/Inst16mm.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=msp430 < %s | FileCheck %s +; RUN: llc -march=msp430 -combiner-alias-analysis < %s | FileCheck %s target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8" target triple = "msp430-generic-generic" @foo = common global i16 0, align 2 @@ -52,3 +52,18 @@ define void @xor() nounwind { ret void } +define i16 @mov2() nounwind { +entry: + %retval = alloca i16 ; <i16*> [#uses=3] + %x = alloca i32, align 2 ; <i32*> [#uses=1] + %y = alloca i32, align 2 ; <i32*> [#uses=1] + store i16 0, i16* %retval + %tmp = load i32* %y ; <i32> [#uses=1] + store i32 %tmp, i32* %x + store i16 0, i16* %retval + %0 = load i16* %retval ; <i16> [#uses=1] + ret i16 %0 +; CHECK: mov2: +; CHECK: mov.w 0(r1), 4(r1) +; CHECK: mov.w 2(r1), 6(r1) +} diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll index 74feaae4eb..0f5fc12b62 100644 --- a/test/CodeGen/MSP430/Inst8rr.ll +++ b/test/CodeGen/MSP430/Inst8rr.ll @@ -10,7 +10,7 @@ define i8 @mov(i8 %a, i8 %b) nounwind { define i8 @add(i8 %a, i8 %b) nounwind { ; CHECK: add: -; CHECK: add.b r14, r15 +; CHECK: add.b r12, r15 %1 = add i8 %a, %b ret i8 %1 } diff --git a/test/CodeGen/MSP430/bit.ll b/test/CodeGen/MSP430/bit.ll index cd664a17bf..03d672bcbe 100644 --- a/test/CodeGen/MSP430/bit.ll +++ b/test/CodeGen/MSP430/bit.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s +; RUN: llc < %s -march=msp430 | FileCheck %s target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:32" target triple = "msp430-generic-generic" diff --git a/test/CodeGen/MSP430/setcc.ll b/test/CodeGen/MSP430/setcc.ll index 9db51cce73..c99b17e143 100644 --- a/test/CodeGen/MSP430/setcc.ll +++ b/test/CodeGen/MSP430/setcc.ll @@ -10,9 +10,9 @@ define i16 @sccweqand(i16 %a, i16 %b) nounwind { } ; CHECK: sccweqand: ; CHECK: bit.w r14, r15 -; CHECK-NEXT: mov.w r2, r15 -; CHECK-NEXT: and.w #1, r15 -; CHECK-NEXT: xor.w #1, r15 +; CHECK: mov.w r2, r15 +; CHECK: rra.w r15 +; CHECK: and.w #1, r15 define i16 @sccwneand(i16 %a, i16 %b) nounwind { %t1 = and i16 %a, %b @@ -22,8 +22,8 @@ define i16 @sccwneand(i16 %a, i16 %b) nounwind { } ; CHECK: sccwneand: ; CHECK: bit.w r14, r15 -; CHECK-NEXT: mov.w r2, r15 -; CHECK-NEXT: and.w #1, r15 +; CHECK: mov.w r2, r15 +; CHECK: and.w #1, r15 define i16 @sccwne(i16 %a, i16 %b) nounwind { %t1 = icmp ne i16 %a, %b @@ -32,9 +32,10 @@ define i16 @sccwne(i16 %a, i16 %b) nounwind { } ; CHECK:sccwne: ; CHECK: cmp.w r14, r15 -; CHECK-NEXT: mov.w r2, r15 -; CHECK-NEXT: rra.w r15 -; CHECK-NEXT: and.w #1, r15 +; CHECK: mov.w r2, r15 +; CHECK: rra.w r15 +; CHECK: and.w #1, r15 +; CHECK: xor.w #1, r15 define i16 @sccweq(i16 %a, i16 %b) nounwind { %t1 = icmp eq i16 %a, %b @@ -43,10 +44,9 @@ define i16 @sccweq(i16 %a, i16 %b) nounwind { } ; CHECK:sccweq: ; CHECK: cmp.w r14, r15 -; CHECK-NEXT: mov.w r2, r15 -; CHECK-NEXT: rra.w r15 -; CHECK-NEXT: and.w #1, r15 -; CHECK-NEXT: xor.w #1, r15 +; CHECK: mov.w r2, r15 +; CHECK: rra.w r15 +; CHECK: and.w #1, r15 define i16 @sccwugt(i16 %a, i16 %b) nounwind { %t1 = icmp ugt i16 %a, %b @@ -55,9 +55,9 @@ define i16 @sccwugt(i16 %a, i16 %b) nounwind { } ; CHECK:sccwugt: ; CHECK: cmp.w r15, r14 -; CHECK-NEXT: mov.w r2, r15 -; CHECK-NEXT: and.w #1, r15 -; CHECK-NEXT: xor.w #1, r15 +; CHECK: mov.w r2, r15 +; CHECK: and.w #1, r15 +; CHECK: xor.w #1, r15 define i16 @sccwuge(i16 %a, i16 %b) nounwind { %t1 = icmp uge i16 %a, %b @@ -66,8 +66,8 @@ define i16 @sccwuge(i16 %a, i16 %b) nounwind { } ; CHECK:sccwuge: ; CHECK: cmp.w r14, r15 -; CHECK-NEXT: mov.w r2, r15 -; CHECK-NEXT: and.w #1, r15 +; CHECK: mov.w r2, r15 +; CHECK: and.w #1, r15 define i16 @sccwult(i16 %a, i16 %b) nounwind { %t1 = icmp ult i16 %a, %b @@ -76,9 +76,9 @@ define i16 @sccwult(i16 %a, i16 %b) nounwind { } ; CHECK:sccwult: ; CHECK: cmp.w r14, r15 -; CHECK-NEXT: mov.w r2, r15 -; CHECK-NEXT: and.w #1, r15 -; CHECK-NEXT: xor.w #1, r15 +; CHECK: mov.w r2, r15 +; CHECK: and.w #1, r15 +; CHECK: xor.w #1, r15 define i16 @sccwule(i16 %a, i16 %b) nounwind { %t1 = icmp ule i16 %a, %b @@ -87,8 +87,8 @@ define i16 @sccwule(i16 %a, i16 %b) nounwind { } ; CHECK:sccwule: ; CHECK: cmp.w r15, r14 -; CHECK-NEXT: mov.w r2, r15 -; CHECK-NEXT: and.w #1, r15 +; CHECK: mov.w r2, r15 +; CHECK: and.w #1, r15 define i16 @sccwsgt(i16 %a, i16 %b) nounwind { %t1 = icmp sgt i16 %a, %b diff --git a/test/CodeGen/PIC16/C16-11.ll b/test/CodeGen/PIC16/C16-11.ll index e70092b11c..8a5a0ac11f 100644 --- a/test/CodeGen/PIC16/C16-11.ll +++ b/test/CodeGen/PIC16/C16-11.ll @@ -1,4 +1,7 @@ -;RUN: llc < %s -march=pic16 +; RUN: llc < %s -march=pic16 +; XFAIL: * +; This fails because PIC16 doesn't define a (xor reg, reg) pattern. +; @c612.auto.a.b = internal global i1 false ; <i1*> [#uses=2] @c612.auto.A.b = internal global i1 false ; <i1*> [#uses=2] diff --git a/test/CodeGen/PIC16/C16-15.ll b/test/CodeGen/PIC16/C16-15.ll index 2e1dc0c013..5ca2d4a9bd 100644 --- a/test/CodeGen/PIC16/C16-15.ll +++ b/test/CodeGen/PIC16/C16-15.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=pic16 | grep "extern @.lib.unordered.f32" | count 3 +; RUN: llc < %s -march=pic16 | grep "extern" | grep "@.lib.unordered.f32" | count 3 @pc = global i8* inttoptr (i64 160 to i8*), align 1 ; <i8**> [#uses=2] @aa = common global i16 0, align 1 ; <i16*> [#uses=0] diff --git a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll index d1d28ae15b..be28a9a454 100644 --- a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll +++ b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s | grep {subfc r3,r5,r4} -; RUN: llc < %s | grep {subfze r4,r2} -; RUN: llc < %s -regalloc=local | grep {subfc r2,r5,r4} +; RUN: llc < %s | grep {subfze r4,r6} +; RUN: llc < %s -regalloc=local | grep {subfc r6,r5,r4} ; RUN: llc < %s -regalloc=local | grep {subfze r3,r3} ; The first argument of subfc must not be the same as any other register. diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll index 5d09696933..50a02781fd 100644 --- a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll +++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 | FileCheck %s +; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin10 -mcpu=g5 | FileCheck %s ; ModuleID = '<stdin>' target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128" target triple = "powerpc-apple-darwin10.0" @@ -10,8 +10,8 @@ target triple = "powerpc-apple-darwin10.0" define void @foo(i32 %y) nounwind ssp { entry: ; CHECK: foo -; CHECK: add r2 -; CHECK: 0(r2) +; CHECK: add r4 +; CHECK: 0(r4) %y_addr = alloca i32 ; <i32*> [#uses=2] %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] store i32 %y, i32* %y_addr diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll new file mode 100644 index 0000000000..b73382e6eb --- /dev/null +++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s +; ModuleID = 'hh.c' +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" +target triple = "powerpc-apple-darwin9.6" +; This formerly used R0 for both the stack address and CR. + +define void @foo() nounwind { +entry: +;CHECK: mfcr r2 +;CHECK: rlwinm r2, r2, 8, 0, 31 +;CHECK: lis r0, 1 +;CHECK: ori r0, r0, 34540 +;CHECK: stwx r2, r1, r0 + %x = alloca [100000 x i8] ; <[100000 x i8]*> [#uses=1] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + %x1 = bitcast [100000 x i8]* %x to i8* ; <i8*> [#uses=1] + call void @bar(i8* %x1) nounwind + call void asm sideeffect "", "~{cr2}"() nounwind + br label %return + +return: ; preds = %entry +;CHECK: lis r0, 1 +;CHECK: ori r0, r0, 34540 +;CHECK: lwzx r2, r1, r0 +;CHECK: rlwinm r2, r2, 24, 0, 31 +;CHECK: mtcrf 32, r2 + ret void +} + +declare void @bar(i8*) diff --git a/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll b/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll new file mode 100644 index 0000000000..f43f5cae6e --- /dev/null +++ b/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll @@ -0,0 +1,433 @@ +; RUN: llc < %s -O3 | FileCheck %s +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" +target triple = "powerpc-apple-darwin9.6" + +; There should be no stfs spills +; CHECK: main: +; CHECK-NOT: stfs +; CHECK: .section + +@.str66 = external constant [3 x i8], align 4 ; <[3 x i8]*> [#uses=1] +@.str31 = external constant [6 x i8], align 4 ; <[6 x i8]*> [#uses=1] +@.str61 = external constant [21 x i8], align 4 ; <[21 x i8]*> [#uses=1] +@.str101 = external constant [61 x i8], align 4 ; <[61 x i8]*> [#uses=1] +@.str104 = external constant [31 x i8], align 4 ; <[31 x i8]*> [#uses=1] +@.str105 = external constant [45 x i8], align 4 ; <[45 x i8]*> [#uses=1] +@.str112 = external constant [38 x i8], align 4 ; <[38 x i8]*> [#uses=1] +@.str121 = external constant [36 x i8], align 4 ; <[36 x i8]*> [#uses=1] +@.str12293 = external constant [67 x i8], align 4 ; <[67 x i8]*> [#uses=1] +@.str123 = external constant [68 x i8], align 4 ; <[68 x i8]*> [#uses=1] +@.str124 = external constant [52 x i8], align 4 ; <[52 x i8]*> [#uses=1] +@.str125 = external constant [51 x i8], align 4 ; <[51 x i8]*> [#uses=1] + +define i32 @main(i32 %argc, i8** %argv) noreturn nounwind { +entry: + br i1 undef, label %bb4.i1, label %my_fopen.exit + +bb4.i1: ; preds = %entry + unreachable + +my_fopen.exit: ; preds = %entry + br i1 undef, label %bb.i, label %bb1.i + +bb.i: ; preds = %my_fopen.exit + unreachable + +bb1.i: ; preds = %my_fopen.exit + br label %bb134.i + +bb2.i: ; preds = %bb134.i + %0 = icmp eq i32 undef, 0 ; <i1> [#uses=1] + br i1 %0, label %bb20.i, label %bb21.i + +bb20.i: ; preds = %bb2.i + br label %bb134.i + +bb21.i: ; preds = %bb2.i + %1 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([6 x i8]* @.str31, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0] + br i1 undef, label %bb30.i, label %bb31.i + +bb30.i: ; preds = %bb21.i + br label %bb134.i + +bb31.i: ; preds = %bb21.i + br i1 undef, label %bb41.i, label %bb44.i + +bb41.i: ; preds = %bb31.i + %2 = icmp slt i32 undef, %argc ; <i1> [#uses=1] + br i1 %2, label %bb1.i77.i, label %bb2.i78.i + +bb1.i77.i: ; preds = %bb41.i + %3 = load float* undef, align 4 ; <float> [#uses=2] + %4 = fcmp ugt float %3, 0.000000e+00 ; <i1> [#uses=1] + br i1 %4, label %bb43.i, label %bb42.i + +bb2.i78.i: ; preds = %bb41.i + unreachable + +bb42.i: ; preds = %bb1.i77.i + unreachable + +bb43.i: ; preds = %bb1.i77.i + br label %bb134.i + +bb44.i: ; preds = %bb31.i + br i1 undef, label %bb45.i, label %bb49.i + +bb45.i: ; preds = %bb44.i + %5 = icmp slt i32 undef, %argc ; <i1> [#uses=1] + br i1 %5, label %bb1.i72.i, label %bb2.i73.i + +bb1.i72.i: ; preds = %bb45.i + %6 = load float* undef, align 4 ; <float> [#uses=3] + %7 = fcmp ult float %6, 1.000000e+00 ; <i1> [#uses=1] + %or.cond.i = and i1 undef, %7 ; <i1> [#uses=1] + br i1 %or.cond.i, label %bb48.i, label %bb47.i + +bb2.i73.i: ; preds = %bb45.i + unreachable + +bb47.i: ; preds = %bb1.i72.i + unreachable + +bb48.i: ; preds = %bb1.i72.i + br label %bb134.i + +bb49.i: ; preds = %bb44.i + br i1 undef, label %bb50.i, label %bb53.i + +bb50.i: ; preds = %bb49.i + br i1 false, label %bb1.i67.i, label %bb2.i68.i + +bb1.i67.i: ; preds = %bb50.i + br i1 false, label %read_float_option.exit69.i, label %bb1.i67.bb2.i68_crit_edge.i + +bb1.i67.bb2.i68_crit_edge.i: ; preds = %bb1.i67.i + br label %bb2.i68.i + +bb2.i68.i: ; preds = %bb1.i67.bb2.i68_crit_edge.i, %bb50.i + unreachable + +read_float_option.exit69.i: ; preds = %bb1.i67.i + br i1 undef, label %bb52.i, label %bb51.i + +bb51.i: ; preds = %read_float_option.exit69.i + unreachable + +bb52.i: ; preds = %read_float_option.exit69.i + br label %bb134.i + +bb53.i: ; preds = %bb49.i + %8 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([21 x i8]* @.str61, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0] + br i1 false, label %bb89.i, label %bb92.i + +bb89.i: ; preds = %bb53.i + br i1 undef, label %bb1.i27.i, label %bb2.i28.i + +bb1.i27.i: ; preds = %bb89.i + unreachable + +bb2.i28.i: ; preds = %bb89.i + unreachable + +bb92.i: ; preds = %bb53.i + br i1 undef, label %bb93.i, label %bb96.i + +bb93.i: ; preds = %bb92.i + br i1 undef, label %bb1.i22.i, label %bb2.i23.i + +bb1.i22.i: ; preds = %bb93.i + br i1 undef, label %bb95.i, label %bb94.i + +bb2.i23.i: ; preds = %bb93.i + unreachable + +bb94.i: ; preds = %bb1.i22.i + unreachable + +bb95.i: ; preds = %bb1.i22.i + br label %bb134.i + +bb96.i: ; preds = %bb92.i + br i1 undef, label %bb97.i, label %bb100.i + +bb97.i: ; preds = %bb96.i + %9 = icmp slt i32 undef, %argc ; <i1> [#uses=1] + br i1 %9, label %bb1.i17.i, label %bb2.i18.i + +bb1.i17.i: ; preds = %bb97.i + %10 = call i32 (i8*, i8*, ...)* @"\01_sscanf$LDBL128"(i8* undef, i8* getelementptr inbounds ([3 x i8]* @.str66, i32 0, i32 0), float* undef) nounwind ; <i32> [#uses=1] + %phitmp.i16.i = icmp eq i32 %10, 1 ; <i1> [#uses=1] + br i1 %phitmp.i16.i, label %read_float_option.exit19.i, label %bb1.i17.bb2.i18_crit_edge.i + +bb1.i17.bb2.i18_crit_edge.i: ; preds = %bb1.i17.i + br label %bb2.i18.i + +bb2.i18.i: ; preds = %bb1.i17.bb2.i18_crit_edge.i, %bb97.i + unreachable + +read_float_option.exit19.i: ; preds = %bb1.i17.i + br i1 false, label %bb99.i, label %bb98.i + +bb98.i: ; preds = %read_float_option.exit19.i + unreachable + +bb99.i: ; preds = %read_float_option.exit19.i + br label %bb134.i + +bb100.i: ; preds = %bb96.i + br i1 false, label %bb101.i, label %bb104.i + +bb101.i: ; preds = %bb100.i + br i1 false, label %bb1.i12.i, label %bb2.i13.i + +bb1.i12.i: ; preds = %bb101.i + br i1 undef, label %bb102.i, label %bb103.i + +bb2.i13.i: ; preds = %bb101.i + unreachable + +bb102.i: ; preds = %bb1.i12.i + unreachable + +bb103.i: ; preds = %bb1.i12.i + br label %bb134.i + +bb104.i: ; preds = %bb100.i + unreachable + +bb134.i: ; preds = %bb103.i, %bb99.i, %bb95.i, %bb52.i, %bb48.i, %bb43.i, %bb30.i, %bb20.i, %bb1.i + %annealing_sched.1.0 = phi float [ 1.000000e+01, %bb1.i ], [ %annealing_sched.1.0, %bb20.i ], [ 1.000000e+00, %bb30.i ], [ %annealing_sched.1.0, %bb43.i ], [ %annealing_sched.1.0, %bb48.i ], [ %annealing_sched.1.0, %bb52.i ], [ %annealing_sched.1.0, %bb95.i ], [ %annealing_sched.1.0, %bb99.i ], [ %annealing_sched.1.0, %bb103.i ] ; <float> [#uses=8] + %annealing_sched.2.0 = phi float [ 1.000000e+02, %bb1.i ], [ %annealing_sched.2.0, %bb20.i ], [ %annealing_sched.2.0, %bb30.i ], [ %3, %bb43.i ], [ %annealing_sched.2.0, %bb48.i ], [ %annealing_sched.2.0, %bb52.i ], [ %annealing_sched.2.0, %bb95.i ], [ %annealing_sched.2.0, %bb99.i ], [ %annealing_sched.2.0, %bb103.i ] ; <float> [#uses=8] + %annealing_sched.3.0 = phi float [ 0x3FE99999A0000000, %bb1.i ], [ %annealing_sched.3.0, %bb20.i ], [ %annealing_sched.3.0, %bb30.i ], [ %annealing_sched.3.0, %bb43.i ], [ %6, %bb48.i ], [ %annealing_sched.3.0, %bb52.i ], [ %annealing_sched.3.0, %bb95.i ], [ %annealing_sched.3.0, %bb99.i ], [ %annealing_sched.3.0, %bb103.i ] ; <float> [#uses=8] + %annealing_sched.4.0 = phi float [ 0x3F847AE140000000, %bb1.i ], [ %annealing_sched.4.0, %bb20.i ], [ %annealing_sched.4.0, %bb30.i ], [ %annealing_sched.4.0, %bb43.i ], [ %annealing_sched.4.0, %bb48.i ], [ 0.000000e+00, %bb52.i ], [ %annealing_sched.4.0, %bb95.i ], [ %annealing_sched.4.0, %bb99.i ], [ %annealing_sched.4.0, %bb103.i ] ; <float> [#uses=8] + %router_opts.0.0 = phi float [ 0.000000e+00, %bb1.i ], [ %router_opts.0.0, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %router_opts.0.0, %bb43.i ], [ %router_opts.0.0, %bb48.i ], [ %router_opts.0.0, %bb52.i ], [ %router_opts.0.0, %bb95.i ], [ %router_opts.0.0, %bb99.i ], [ %router_opts.0.0, %bb103.i ] ; <float> [#uses=8] + %router_opts.1.0 = phi float [ 5.000000e-01, %bb1.i ], [ %router_opts.1.0, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %router_opts.1.0, %bb43.i ], [ %router_opts.1.0, %bb48.i ], [ %router_opts.1.0, %bb52.i ], [ undef, %bb95.i ], [ %router_opts.1.0, %bb99.i ], [ %router_opts.1.0, %bb103.i ] ; <float> [#uses=7] + %router_opts.2.0 = phi float [ 1.500000e+00, %bb1.i ], [ %router_opts.2.0, %bb20.i ], [ %router_opts.2.0, %bb30.i ], [ %router_opts.2.0, %bb43.i ], [ %router_opts.2.0, %bb48.i ], [ %router_opts.2.0, %bb52.i ], [ %router_opts.2.0, %bb95.i ], [ undef, %bb99.i ], [ %router_opts.2.0, %bb103.i ] ; <float> [#uses=8] + %router_opts.3.0 = phi float [ 0x3FC99999A0000000, %bb1.i ], [ %router_opts.3.0, %bb20.i ], [ %router_opts.3.0, %bb30.i ], [ %router_opts.3.0, %bb43.i ], [ %router_opts.3.0, %bb48.i ], [ %router_opts.3.0, %bb52.i ], [ %router_opts.3.0, %bb95.i ], [ %router_opts.3.0, %bb99.i ], [ 0.000000e+00, %bb103.i ] ; <float> [#uses=8] + %11 = phi float [ 0x3FC99999A0000000, %bb1.i ], [ %11, %bb20.i ], [ %11, %bb30.i ], [ %11, %bb43.i ], [ %11, %bb48.i ], [ %11, %bb52.i ], [ %11, %bb95.i ], [ %11, %bb99.i ], [ 0.000000e+00, %bb103.i ] ; <float> [#uses=8] + %12 = phi float [ 1.500000e+00, %bb1.i ], [ %12, %bb20.i ], [ %12, %bb30.i ], [ %12, %bb43.i ], [ %12, %bb48.i ], [ %12, %bb52.i ], [ %12, %bb95.i ], [ undef, %bb99.i ], [ %12, %bb103.i ] ; <float> [#uses=8] + %13 = phi float [ 5.000000e-01, %bb1.i ], [ %13, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %13, %bb43.i ], [ %13, %bb48.i ], [ %13, %bb52.i ], [ undef, %bb95.i ], [ %13, %bb99.i ], [ %13, %bb103.i ] ; <float> [#uses=7] + %14 = phi float [ 0.000000e+00, %bb1.i ], [ %14, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %14, %bb43.i ], [ %14, %bb48.i ], [ %14, %bb52.i ], [ %14, %bb95.i ], [ %14, %bb99.i ], [ %14, %bb103.i ] ; <float> [#uses=8] + %15 = phi float [ 0x3FE99999A0000000, %bb1.i ], [ %15, %bb20.i ], [ %15, %bb30.i ], [ %15, %bb43.i ], [ %6, %bb48.i ], [ %15, %bb52.i ], [ %15, %bb95.i ], [ %15, %bb99.i ], [ %15, %bb103.i ] ; <float> [#uses=8] + %16 = phi float [ 0x3F847AE140000000, %bb1.i ], [ %16, %bb20.i ], [ %16, %bb30.i ], [ %16, %bb43.i ], [ %16, %bb48.i ], [ 0.000000e+00, %bb52.i ], [ %16, %bb95.i ], [ %16, %bb99.i ], [ %16, %bb103.i ] ; <float> [#uses=8] + %17 = phi float [ 1.000000e+01, %bb1.i ], [ %17, %bb20.i ], [ 1.000000e+00, %bb30.i ], [ %17, %bb43.i ], [ %17, %bb48.i ], [ %17, %bb52.i ], [ %17, %bb95.i ], [ %17, %bb99.i ], [ %17, %bb103.i ] ; <float> [#uses=8] + %18 = icmp slt i32 undef, %argc ; <i1> [#uses=1] + br i1 %18, label %bb2.i, label %bb135.i + +bb135.i: ; preds = %bb134.i + br i1 undef, label %bb141.i, label %bb142.i + +bb141.i: ; preds = %bb135.i + unreachable + +bb142.i: ; preds = %bb135.i + br i1 undef, label %bb145.i, label %bb144.i + +bb144.i: ; preds = %bb142.i + unreachable + +bb145.i: ; preds = %bb142.i + br i1 undef, label %bb146.i, label %bb147.i + +bb146.i: ; preds = %bb145.i + unreachable + +bb147.i: ; preds = %bb145.i + br i1 undef, label %bb148.i, label %bb155.i + +bb148.i: ; preds = %bb147.i + br label %bb155.i + +bb155.i: ; preds = %bb148.i, %bb147.i + br i1 undef, label %bb156.i, label %bb161.i + +bb156.i: ; preds = %bb155.i + unreachable + +bb161.i: ; preds = %bb155.i + br i1 undef, label %bb162.i, label %bb163.i + +bb162.i: ; preds = %bb161.i + %19 = fpext float %17 to double ; <double> [#uses=1] + %20 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([61 x i8]* @.str101, i32 0, i32 0), double %19) nounwind ; <i32> [#uses=0] + unreachable + +bb163.i: ; preds = %bb161.i + %21 = fpext float %16 to double ; <double> [#uses=1] + %22 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([31 x i8]* @.str104, i32 0, i32 0), double %21) nounwind ; <i32> [#uses=0] + %23 = fpext float %15 to double ; <double> [#uses=1] + %24 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([45 x i8]* @.str105, i32 0, i32 0), double %23) nounwind ; <i32> [#uses=0] + %25 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([38 x i8]* @.str112, i32 0, i32 0), double undef) nounwind ; <i32> [#uses=0] + br i1 undef, label %parse_command.exit, label %bb176.i + +bb176.i: ; preds = %bb163.i + br i1 undef, label %bb177.i, label %bb178.i + +bb177.i: ; preds = %bb176.i + unreachable + +bb178.i: ; preds = %bb176.i + %26 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([36 x i8]* @.str121, i32 0, i32 0), double undef) nounwind ; <i32> [#uses=0] + %27 = fpext float %14 to double ; <double> [#uses=1] + %28 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([67 x i8]* @.str12293, i32 0, i32 0), double %27) nounwind ; <i32> [#uses=0] + %29 = fpext float %13 to double ; <double> [#uses=1] + %30 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([68 x i8]* @.str123, i32 0, i32 0), double %29) nounwind ; <i32> [#uses=0] + %31 = fpext float %12 to double ; <double> [#uses=1] + %32 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([52 x i8]* @.str124, i32 0, i32 0), double %31) nounwind ; <i32> [#uses=0] + %33 = fpext float %11 to double ; <double> [#uses=1] + %34 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([51 x i8]* @.str125, i32 0, i32 0), double %33) nounwind ; <i32> [#uses=0] + unreachable + +parse_command.exit: ; preds = %bb163.i + br i1 undef, label %bb4.i152.i, label %my_fopen.exit.i + +bb4.i152.i: ; preds = %parse_command.exit + unreachable + +my_fopen.exit.i: ; preds = %parse_command.exit + br i1 undef, label %bb.i6.i99, label %bb49.preheader.i.i + +bb.i6.i99: ; preds = %my_fopen.exit.i + br i1 undef, label %bb3.i.i100, label %bb1.i8.i + +bb1.i8.i: ; preds = %bb.i6.i99 + unreachable + +bb3.i.i100: ; preds = %bb.i6.i99 + unreachable + +bb49.preheader.i.i: ; preds = %my_fopen.exit.i + br i1 undef, label %bb7.i11.i, label %bb50.i.i + +bb7.i11.i: ; preds = %bb49.preheader.i.i + unreachable + +bb50.i.i: ; preds = %bb49.preheader.i.i + br i1 undef, label %bb.i.i.i20.i, label %my_calloc.exit.i.i.i + +bb.i.i.i20.i: ; preds = %bb50.i.i + unreachable + +my_calloc.exit.i.i.i: ; preds = %bb50.i.i + br i1 undef, label %bb.i.i37.i.i, label %alloc_hash_table.exit.i21.i + +bb.i.i37.i.i: ; preds = %my_calloc.exit.i.i.i + unreachable + +alloc_hash_table.exit.i21.i: ; preds = %my_calloc.exit.i.i.i + br i1 undef, label %bb51.i.i, label %bb3.i23.i.i + +bb51.i.i: ; preds = %alloc_hash_table.exit.i21.i + unreachable + +bb3.i23.i.i: ; preds = %alloc_hash_table.exit.i21.i + br i1 undef, label %bb.i8.i.i, label %bb.nph.i.i + +bb.nph.i.i: ; preds = %bb3.i23.i.i + unreachable + +bb.i8.i.i: ; preds = %bb3.i.i34.i, %bb3.i23.i.i + br i1 undef, label %bb3.i.i34.i, label %bb1.i.i32.i + +bb1.i.i32.i: ; preds = %bb.i8.i.i + unreachable + +bb3.i.i34.i: ; preds = %bb.i8.i.i + br i1 undef, label %free_hash_table.exit.i.i, label %bb.i8.i.i + +free_hash_table.exit.i.i: ; preds = %bb3.i.i34.i + br i1 undef, label %check_netlist.exit.i, label %bb59.i.i + +bb59.i.i: ; preds = %free_hash_table.exit.i.i + unreachable + +check_netlist.exit.i: ; preds = %free_hash_table.exit.i.i + br label %bb.i.i3.i + +bb.i.i3.i: ; preds = %bb3.i.i4.i, %check_netlist.exit.i + br i1 false, label %bb3.i.i4.i, label %bb1.i.i.i122 + +bb1.i.i.i122: ; preds = %bb1.i.i.i122, %bb.i.i3.i + br i1 false, label %bb3.i.i4.i, label %bb1.i.i.i122 + +bb3.i.i4.i: ; preds = %bb1.i.i.i122, %bb.i.i3.i + br i1 undef, label %read_net.exit, label %bb.i.i3.i + +read_net.exit: ; preds = %bb3.i.i4.i + br i1 undef, label %bb.i44, label %bb3.i47 + +bb.i44: ; preds = %read_net.exit + unreachable + +bb3.i47: ; preds = %read_net.exit + br i1 false, label %bb9.i50, label %bb8.i49 + +bb8.i49: ; preds = %bb3.i47 + unreachable + +bb9.i50: ; preds = %bb3.i47 + br i1 undef, label %bb11.i51, label %bb12.i52 + +bb11.i51: ; preds = %bb9.i50 + unreachable + +bb12.i52: ; preds = %bb9.i50 + br i1 undef, label %bb.i.i53, label %my_malloc.exit.i54 + +bb.i.i53: ; preds = %bb12.i52 + unreachable + +my_malloc.exit.i54: ; preds = %bb12.i52 + br i1 undef, label %bb.i2.i55, label %my_malloc.exit3.i56 + +bb.i2.i55: ; preds = %my_malloc.exit.i54 + unreachable + +my_malloc.exit3.i56: ; preds = %my_malloc.exit.i54 + br i1 undef, label %bb.i.i.i57, label %my_malloc.exit.i.i + +bb.i.i.i57: ; preds = %my_malloc.exit3.i56 + unreachable + +my_malloc.exit.i.i: ; preds = %my_malloc.exit3.i56 + br i1 undef, label %bb, label %bb10 + +bb: ; preds = %my_malloc.exit.i.i + unreachable + +bb10: ; preds = %my_malloc.exit.i.i + br i1 false, label %bb12, label %bb11 + +bb11: ; preds = %bb10 + unreachable + +bb12: ; preds = %bb10 + store float %annealing_sched.1.0, float* null, align 4 + store float %annealing_sched.2.0, float* undef, align 8 + store float %annealing_sched.3.0, float* undef, align 4 + store float %annealing_sched.4.0, float* undef, align 8 + store float %router_opts.0.0, float* undef, align 8 + store float %router_opts.1.0, float* undef, align 4 + store float %router_opts.2.0, float* null, align 8 + store float %router_opts.3.0, float* undef, align 4 + br i1 undef, label %place_and_route.exit, label %bb7.i22 + +bb7.i22: ; preds = %bb12 + br i1 false, label %bb8.i23, label %bb9.i26 + +bb8.i23: ; preds = %bb7.i22 + unreachable + +bb9.i26: ; preds = %bb7.i22 + unreachable + +place_and_route.exit: ; preds = %bb12 + unreachable +} + +declare i32 @"\01_printf$LDBL128"(i8*, ...) nounwind + +declare i32 @strcmp(i8* nocapture, i8* nocapture) nounwind readonly + +declare i32 @"\01_sscanf$LDBL128"(i8*, i8*, ...) nounwind diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll index aed4fdbb2d..466ae80341 100644 --- a/test/CodeGen/PowerPC/Frames-alloca.ll +++ b/test/CodeGen/PowerPC/Frames-alloca.ll @@ -24,7 +24,7 @@ ; CHECK-PPC64-NOFP: ld r1, 0(r1) ; CHECK-PPC64-NOFP: ld r31, -8(r1) -define i32* @f1(i32 %n) { +define i32* @f1(i32 %n) nounwind { %tmp = alloca i32, i32 %n ; <i32*> [#uses=1] ret i32* %tmp } diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll index 0f7acacbfa..b10a996867 100644 --- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll +++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll @@ -3,14 +3,14 @@ ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \ ; RUN: grep {stw r3, 32751} ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \ -; RUN: grep {std r2, 9024} +; RUN: grep {std r3, 9024} -define void @test() { +define void @test() nounwind { store i32 0, i32* inttoptr (i64 48725999 to i32*) ret void } -define void @test2() { +define void @test2() nounwind { store i64 0, i64* inttoptr (i64 74560 to i64*) ret void } diff --git a/test/CodeGen/PowerPC/addc.ll b/test/CodeGen/PowerPC/addc.ll index 09a7fbd7a6..8c928ce8bc 100644 --- a/test/CodeGen/PowerPC/addc.ll +++ b/test/CodeGen/PowerPC/addc.ll @@ -1,26 +1,33 @@ ; All of these should be codegen'd without loading immediates -; RUN: llc < %s -march=ppc32 -o %t -; RUN: grep addc %t | count 1 -; RUN: grep adde %t | count 1 -; RUN: grep addze %t | count 1 -; RUN: grep addme %t | count 1 -; RUN: grep addic %t | count 2 +; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s -define i64 @add_ll(i64 %a, i64 %b) { +define i64 @add_ll(i64 %a, i64 %b) nounwind { entry: %tmp.2 = add i64 %b, %a ; <i64> [#uses=1] ret i64 %tmp.2 +; CHECK: add_ll: +; CHECK: addc r4, r6, r4 +; CHECK: adde r3, r5, r3 +; CHECK: blr } -define i64 @add_l_5(i64 %a) { +define i64 @add_l_5(i64 %a) nounwind { entry: %tmp.1 = add i64 %a, 5 ; <i64> [#uses=1] ret i64 %tmp.1 +; CHECK: add_l_5: +; CHECK: addic r4, r4, 5 +; CHECK: addze r3, r3 +; CHECK: blr } -define i64 @add_l_m5(i64 %a) { +define i64 @add_l_m5(i64 %a) nounwind { entry: %tmp.1 = add i64 %a, -5 ; <i64> [#uses=1] ret i64 %tmp.1 +; CHECK: add_l_m5: +; CHECK: addic r4, r4, -5 +; CHECK: addme r3, r3 +; CHECK: blr } diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll index fbc7bd2264..2094e10a58 100644 --- a/test/CodeGen/PowerPC/indirectbr.ll +++ b/test/CodeGen/PowerPC/indirectbr.ll @@ -43,13 +43,13 @@ L2: ; preds = %L3, %bb2 L1: ; preds = %L2, %bb2 %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ] ; <i32> [#uses=1] -; PIC: addis r4, r2, ha16(L_BA4__foo_L5-"L1$pb") -; PIC: li r5, lo16(L_BA4__foo_L5-"L1$pb") -; PIC: add r4, r4, r5 -; PIC: stw r4 -; STATIC: li r2, lo16(L_BA4__foo_L5) -; STATIC: addis r2, r2, ha16(L_BA4__foo_L5) -; STATIC: stw r2 +; PIC: addis r5, r4, ha16(L_BA4__foo_L5-"L1$pb") +; PIC: li r6, lo16(L_BA4__foo_L5-"L1$pb") +; PIC: add r5, r5, r6 +; PIC: stw r5 +; STATIC: li r4, lo16(L_BA4__foo_L5) +; STATIC: addis r4, r4, ha16(L_BA4__foo_L5) +; STATIC: stw r4 store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4 ret i32 %res.3 } diff --git a/test/CodeGen/PowerPC/lsr-postinc-pos.ll b/test/CodeGen/PowerPC/lsr-postinc-pos.ll new file mode 100644 index 0000000000..f441e42da2 --- /dev/null +++ b/test/CodeGen/PowerPC/lsr-postinc-pos.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -print-lsr-output |& FileCheck %s + +; The icmp is a post-inc use, and the increment is in %bb11, but the +; scevgep needs to be inserted in %bb so that it is dominated by %t. + +; CHECK: %t = load i8** undef +; CHECK: %scevgep = getelementptr i8* %t, i32 %lsr.iv.next +; CHECK: %c1 = icmp ult i8* %scevgep, undef + +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" +target triple = "powerpc-apple-darwin9" + +define void @foo() nounwind { +entry: + br label %bb11 + +bb11: + %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] ; <i32> [#uses=3] + %ii = shl i32 %i, 2 ; <i32> [#uses=1] + %c0 = icmp eq i32 %i, undef ; <i1> [#uses=1] + br i1 %c0, label %bb13, label %bb + +bb: + %t = load i8** undef, align 16 ; <i8*> [#uses=1] + %p = getelementptr i8* %t, i32 %ii ; <i8*> [#uses=1] + %c1 = icmp ult i8* %p, undef ; <i1> [#uses=1] + %i.next = add i32 %i, 1 ; <i32> [#uses=1] + br i1 %c1, label %bb11, label %bb13 + +bb13: + unreachable +} diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll index b267719421..17e7e2849c 100644 --- a/test/CodeGen/PowerPC/mem_update.ll +++ b/test/CodeGen/PowerPC/mem_update.ll @@ -3,66 +3,66 @@ ; RUN: llc < %s -march=ppc64 -enable-ppc-preinc | \ ; RUN: not grep addi -@Glob = global i64 4 ; <i64*> [#uses=2] +@Glob = global i64 4 -define i32* @test0(i32* %X, i32* %dest) { - %Y = getelementptr i32* %X, i32 4 ; <i32*> [#uses=2] - %A = load i32* %Y ; <i32> [#uses=1] +define i32* @test0(i32* %X, i32* %dest) nounwind { + %Y = getelementptr i32* %X, i32 4 + %A = load i32* %Y store i32 %A, i32* %dest ret i32* %Y } -define i32* @test1(i32* %X, i32* %dest) { - %Y = getelementptr i32* %X, i32 4 ; <i32*> [#uses=2] - %A = load i32* %Y ; <i32> [#uses=1] +define i32* @test1(i32* %X, i32* %dest) nounwind { + %Y = getelementptr i32* %X, i32 4 + %A = load i32* %Y store i32 %A, i32* %dest ret i32* %Y } -define i16* @test2(i16* %X, i32* %dest) { - %Y = getelementptr i16* %X, i32 4 ; <i16*> [#uses=2] - %A = load i16* %Y ; <i16> [#uses=1] - %B = sext i16 %A to i32 ; <i32> [#uses=1] +define i16* @test2(i16* %X, i32* %dest) nounwind { + %Y = getelementptr i16* %X, i32 4 + %A = load i16* %Y + %B = sext i16 %A to i32 store i32 %B, i32* %dest ret i16* %Y } -define i16* @test3(i16* %X, i32* %dest) { - %Y = getelementptr i16* %X, i32 4 ; <i16*> [#uses=2] - %A = load i16* %Y ; <i16> [#uses=1] - %B = zext i16 %A to i32 ; <i32> [#uses=1] +define i16* @test3(i16* %X, i32* %dest) nounwind { + %Y = getelementptr i16* %X, i32 4 + %A = load i16* %Y + %B = zext i16 %A to i32 store i32 %B, i32* %dest ret i16* %Y } -define i16* @test3a(i16* %X, i64* %dest) { - %Y = getelementptr i16* %X, i32 4 ; <i16*> [#uses=2] - %A = load i16* %Y ; <i16> [#uses=1] - %B = sext i16 %A to i64 ; <i64> [#uses=1] +define i16* @test3a(i16* %X, i64* %dest) nounwind { + %Y = getelementptr i16* %X, i32 4 + %A = load i16* %Y + %B = sext i16 %A to i64 store i64 %B, i64* %dest ret i16* %Y } -define i64* @test4(i64* %X, i64* %dest) { - %Y = getelementptr i64* %X, i32 4 ; <i64*> [#uses=2] - %A = load i64* %Y ; <i64> [#uses=1] +define i64* @test4(i64* %X, i64* %dest) nounwind { + %Y = getelementptr i64* %X, i32 4 + %A = load i64* %Y store i64 %A, i64* %dest ret i64* %Y } -define i16* @test5(i16* %X) { - %Y = getelementptr i16* %X, i32 4 ; <i16*> [#uses=2] +define i16* @test5(i16* %X) nounwind { + %Y = getelementptr i16* %X, i32 4 store i16 7, i16* %Y ret i16* %Y } -define i64* @test6(i64* %X, i64 %A) { - %Y = getelementptr i64* %X, i32 4 ; <i64*> [#uses=2] +define i64* @test6(i64* %X, i64 %A) nounwind { + %Y = getelementptr i64* %X, i32 4 store i64 %A, i64* %Y ret i64* %Y } -define i64* @test7(i64* %X, i64 %A) { +define i64* @test7(i64* %X, i64 %A) nounwind { store i64 %A, i64* @Glob ret i64* @Glob } diff --git a/test/CodeGen/PowerPC/retaddr.ll b/test/CodeGen/PowerPC/retaddr.ll index 9f8647d087..cf16b4c26f 100644 --- a/test/CodeGen/PowerPC/retaddr.ll +++ b/test/CodeGen/PowerPC/retaddr.ll @@ -4,7 +4,7 @@ target triple = "powerpc-apple-darwin8" -define void @foo(i8** %X) { +define void @foo(i8** %X) nounwind { entry: %tmp = tail call i8* @llvm.returnaddress( i32 0 ) ; <i8*> [#uses=1] store i8* %tmp, i8** %X, align 4 diff --git a/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll b/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll new file mode 100644 index 0000000000..363f5719d1 --- /dev/null +++ b/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll @@ -0,0 +1,76 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" + +define arm_apcscc i32 @test(i32 %n) nounwind { +; CHECK: test: +; CHECK-NOT: mov +; CHECK: return +entry: + %0 = icmp eq i32 %n, 1 ; <i1> [#uses=1] + br i1 %0, label %return, label %bb.nph + +bb.nph: ; preds = %entry + %tmp = add i32 %n, -1 ; <i32> [#uses=1] + br label %bb + +bb: ; preds = %bb.nph, %bb + %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=1] + %u.05 = phi i64 [ undef, %bb.nph ], [ %ins, %bb ] ; <i64> [#uses=1] + %1 = tail call arm_apcscc i32 @f() nounwind ; <i32> [#uses=1] + %tmp4 = zext i32 %1 to i64 ; <i64> [#uses=1] + %mask = and i64 %u.05, -4294967296 ; <i64> [#uses=1] + %ins = or i64 %tmp4, %mask ; <i64> [#uses=2] + tail call arm_apcscc void @g(i64 %ins) nounwind + %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %tmp ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret i32 undef +} + +define arm_apcscc i32 @test_dead_cycle(i32 %n) nounwind { +; CHECK: test_dead_cycle: +; CHECK: blx +; CHECK-NOT: mov +; CHECK: blx +entry: + %0 = icmp eq i32 %n, 1 ; <i1> [#uses=1] + br i1 %0, label %return, label %bb.nph + +bb.nph: ; preds = %entry + %tmp = add i32 %n, -1 ; <i32> [#uses=2] + br label %bb + +bb: ; preds = %bb.nph, %bb2 + %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb2 ] ; <i32> [#uses=2] + %u.17 = phi i64 [ undef, %bb.nph ], [ %u.0, %bb2 ] ; <i64> [#uses=2] + %tmp9 = sub i32 %tmp, %indvar ; <i32> [#uses=1] + %1 = icmp sgt i32 %tmp9, 1 ; <i1> [#uses=1] + br i1 %1, label %bb1, label %bb2 + +bb1: ; preds = %bb + %2 = tail call arm_apcscc i32 @f() nounwind ; <i32> [#uses=1] + %tmp6 = zext i32 %2 to i64 ; <i64> [#uses=1] + %mask = and i64 %u.17, -4294967296 ; <i64> [#uses=1] + %ins = or i64 %tmp6, %mask ; <i64> [#uses=1] + tail call arm_apcscc void @g(i64 %ins) nounwind + br label %bb2 + +bb2: ; preds = %bb1, %bb +; also check for duplicate induction variables (radar 7645034) +; CHECK: subs r{{.*}}, #1 +; CHECK-NOT: subs r{{.*}}, #1 +; CHECK: pop + %u.0 = phi i64 [ %ins, %bb1 ], [ %u.17, %bb ] ; <i64> [#uses=2] + %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %tmp ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb2, %entry + ret i32 undef +} + +declare arm_apcscc i32 @f() + +declare arm_apcscc void @g(i64) diff --git a/test/CodeGen/Thumb2/2010-02-24-BigStack.ll b/test/CodeGen/Thumb2/2010-02-24-BigStack.ll new file mode 100644 index 0000000000..533546bb19 --- /dev/null +++ b/test/CodeGen/Thumb2/2010-02-24-BigStack.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -O0 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 -mattr=+vfp2 +; This test creates a big stack frame without spilling any callee-saved registers. +; Make sure the whole stack frame is addrerssable wiothout scavenger crashes. +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin3.0.0-iphoneos" + +define arm_apcscc void @FindMin(double* %panelTDEL, i8* %dclOfRow, i32 %numRows, i32 %numCols, double* %retMin_RES_TDEL) { +entry: + %panelTDEL.addr = alloca double*, align 4 ; <double**> [#uses=1] + %panelResTDEL = alloca [2560 x double], align 4 ; <[2560 x double]*> [#uses=0] + store double* %panelTDEL, double** %panelTDEL.addr + store double* %retMin_RES_TDEL, double** undef + store i32 0, i32* undef + unreachable +} diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll index 8f6449e8ff..2b20931979 100644 --- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll +++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 7 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 3 define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind { entry: diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll index 47d85b1aa0..f007b5c697 100644 --- a/test/CodeGen/Thumb2/ldr-str-imm12.ll +++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll @@ -52,7 +52,7 @@ bb420: ; preds = %bb20, %bb20 ; CHECK: str r{{[0-7]}}, [sp] ; CHECK: str r{{[0-7]}}, [sp, #+4] ; CHECK: str r{{[0-7]}}, [sp, #+8] -; CHECK: str r{{[0-7]}}, [sp, #+24] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #+24] store %union.rec* null, %union.rec** @zz_hold, align 4 store %union.rec* null, %union.rec** @zz_res, align 4 store %union.rec* %x, %union.rec** @zz_hold, align 4 diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll index 7b1b57a786..ac2cd34e4b 100644 --- a/test/CodeGen/Thumb2/lsr-deficiency.ll +++ b/test/CodeGen/Thumb2/lsr-deficiency.ll @@ -1,25 +1,29 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s ; rdar://7387640 -; FIXME: We still need to rewrite array reference iv of stride -4 with loop -; count iv of stride -1. +; This now reduces to a single induction variable. + +; TODO: It still gets a GPR shuffle at the end of the loop +; This is because something in instruction selection has decided +; that comparing the pre-incremented value with zero is better +; than comparing the post-incremented value with -4. @G = external global i32 ; <i32*> [#uses=2] @array = external global i32* ; <i32**> [#uses=1] define arm_apcscc void @t() nounwind optsize { ; CHECK: t: -; CHECK: mov.w r2, #4000 -; CHECK: movw r3, #1001 +; CHECK: mov.w r2, #1000 entry: %.pre = load i32* @G, align 4 ; <i32> [#uses=1] br label %bb bb: ; preds = %bb, %entry ; CHECK: LBB1_1: -; CHECK: subs r3, #1 -; CHECK: cmp r3, #0 -; CHECK: sub.w r2, r2, #4 +; CHECK: cmp r2, #0 +; CHECK: sub.w r9, r2, #1 +; CHECK: mov r2, r9 + %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1] %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2] %tmp5 = sub i32 1000, %indvar ; <i32> [#uses=1] diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll index 71199abc57..1d267565e0 100644 --- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll +++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { +define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK: t1: ; CHECK: it ne ; CHECK: cmpne @@ -20,12 +20,12 @@ cond_next: } ; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt. -define i32 @t2(i32 %a, i32 %b) { +define i32 @t2(i32 %a, i32 %b) nounwind { entry: ; CHECK: t2: -; CHECK: ite le -; CHECK: suble +; CHECK: ite gt ; CHECK: subgt +; CHECK: suble %tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1] br i1 %tmp1434, label %bb17, label %bb.outer @@ -60,14 +60,14 @@ bb17: ; preds = %cond_false, %cond_true, %entry @x = external global i32* ; <i32**> [#uses=1] -define void @foo(i32 %a) { +define void @foo(i32 %a) nounwind { entry: %tmp = load i32** @x ; <i32*> [#uses=1] store i32 %a, i32* %tmp ret void } -define void @t3(i32 %a, i32 %b) { +define void @t3(i32 %a, i32 %b) nounwind { entry: ; CHECK: t3: ; CHECK: it lt diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll index 7935163761..ff178b42fb 100644 --- a/test/CodeGen/Thumb2/thumb2-spill-q.ll +++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll @@ -12,8 +12,8 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly define arm_apcscc void @aaa(%quuz* %this, i8* %block) { ; CHECK: aaa: ; CHECK: bic r4, r4, #15 -; CHECK: vst1.64 {{.*}}[r{{.*}}, :128] -; CHECK: vld1.64 {{.*}}[r{{.*}}, :128] +; CHECK: vst1.64 {{.*}}[{{.*}}, :128] +; CHECK: vld1.64 {{.*}}[{{.*}}, :128] entry: %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] store float 6.300000e+01, float* undef, align 4 diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll index 4e23f5356c..91598cdc96 100644 --- a/test/CodeGen/Thumb2/thumb2-uxtb.ll +++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll @@ -2,14 +2,14 @@ define i32 @test1(i32 %x) { ; CHECK: test1 -; CHECK: uxtb16.w r0, r0 +; CHECK: uxtb16 r0, r0 %tmp1 = and i32 %x, 16711935 ; <i32> [#uses=1] ret i32 %tmp1 } define i32 @test2(i32 %x) { ; CHECK: test2 -; CHECK: uxtb16.w r0, r0, ror #8 +; CHECK: uxtb16 r0, r0, ror #8 %tmp1 = lshr i32 %x, 8 ; <i32> [#uses=1] %tmp2 = and i32 %tmp1, 16711935 ; <i32> [#uses=1] ret i32 %tmp2 @@ -17,7 +17,7 @@ define i32 @test2(i32 %x) { define i32 @test3(i32 %x) { ; CHECK: test3 -; CHECK: uxtb16.w r0, r0, ror #8 +; CHECK: uxtb16 r0, r0, ror #8 %tmp1 = lshr i32 %x, 8 ; <i32> [#uses=1] %tmp2 = and i32 %tmp1, 16711935 ; <i32> [#uses=1] ret i32 %tmp2 @@ -25,7 +25,7 @@ define i32 @test3(i32 %x) { define i32 @test4(i32 %x) { ; CHECK: test4 -; CHECK: uxtb16.w r0, r0, ror #8 +; CHECK: uxtb16 r0, r0, ror #8 %tmp1 = lshr i32 %x, 8 ; <i32> [#uses=1] %tmp6 = and i32 %tmp1, 16711935 ; <i32> [#uses=1] ret i32 %tmp6 @@ -33,7 +33,7 @@ define i32 @test4(i32 %x) { define i32 @test5(i32 %x) { ; CHECK: test5 -; CHECK: uxtb16.w r0, r0, ror #8 +; CHECK: uxtb16 r0, r0, ror #8 %tmp1 = lshr i32 %x, 8 ; <i32> [#uses=1] %tmp2 = and i32 %tmp1, 16711935 ; <i32> [#uses=1] ret i32 %tmp2 @@ -41,7 +41,7 @@ define i32 @test5(i32 %x) { define i32 @test6(i32 %x) { ; CHECK: test6 -; CHECK: uxtb16.w r0, r0, ror #16 +; CHECK: uxtb16 r0, r0, ror #16 %tmp1 = lshr i32 %x, 16 ; <i32> [#uses=1] %tmp2 = and i32 %tmp1, 255 ; <i32> [#uses=1] %tmp4 = shl i32 %x, 16 ; <i32> [#uses=1] @@ -52,7 +52,7 @@ define i32 @test6(i32 %x) { define i32 @test7(i32 %x) { ; CHECK: test7 -; CHECK: uxtb16.w r0, r0, ror #16 +; CHECK: uxtb16 r0, r0, ror #16 %tmp1 = lshr i32 %x, 16 ; <i32> [#uses=1] %tmp2 = and i32 %tmp1, 255 ; <i32> [#uses=1] %tmp4 = shl i32 %x, 16 ; <i32> [#uses=1] @@ -63,7 +63,7 @@ define i32 @test7(i32 %x) { define i32 @test8(i32 %x) { ; CHECK: test8 -; CHECK: uxtb16.w r0, r0, ror #24 +; CHECK: uxtb16 r0, r0, ror #24 %tmp1 = shl i32 %x, 8 ; <i32> [#uses=1] %tmp2 = and i32 %tmp1, 16711680 ; <i32> [#uses=1] %tmp5 = lshr i32 %x, 24 ; <i32> [#uses=1] @@ -73,7 +73,7 @@ define i32 @test8(i32 %x) { define i32 @test9(i32 %x) { ; CHECK: test9 -; CHECK: uxtb16.w r0, r0, ror #24 +; CHECK: uxtb16 r0, r0, ror #24 %tmp1 = lshr i32 %x, 24 ; <i32> [#uses=1] %tmp4 = shl i32 %x, 8 ; <i32> [#uses=1] %tmp5 = and i32 %tmp4, 16711680 ; <i32> [#uses=1] @@ -86,7 +86,7 @@ define i32 @test10(i32 %p0) { ; CHECK: mov.w r1, #16253176 ; CHECK: and.w r0, r1, r0, lsr #7 ; CHECK: lsrs r1, r0, #5 -; CHECK: uxtb16.w r1, r1 +; CHECK: uxtb16 r1, r1 ; CHECK: orr.w r0, r1, r0 %tmp1 = lshr i32 %p0, 7 ; <i32> [#uses=1] diff --git a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll index 32fafc61e8..fe6674da04 100644 --- a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll +++ b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll @@ -7,7 +7,7 @@ @GLOBAL = external global i32 ; <i32*> [#uses=1] -define i32 @test(i32* %P1, i32* %P2, i32* %P3) { +define i32 @test(i32* %P1, i32* %P2, i32* %P3) nounwind { %L = load i32* @GLOBAL ; <i32> [#uses=1] store i32 12, i32* %P2 %Y = load i32* %P3 ; <i32> [#uses=1] diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll index bdbe713a29..56d6aa960e 100644 --- a/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\ -; RUN: grep {asm-printer} | grep 31 +; RUN: grep {asm-printer} | grep 34 target datalayout = "e-p:32:32" define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind { @@ -40,7 +40,7 @@ cond_true: ; preds = %cond_true, %entry %tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1] store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7 %tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1] - %tmp.upgrd.8 = icmp slt i32 %tmp147, %M ; <i1> [#uses=1] + %tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1] %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] br i1 %tmp.upgrd.8, label %cond_true, label %return diff --git a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll index bf9fa5782b..d09d061476 100644 --- a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll +++ b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll @@ -5,7 +5,7 @@ target datalayout = "e-p:32:32" target triple = "i686-apple-darwin8.7.2" -define <4 x float> @test(<4 x float> %A, <4 x float>* %B) { +define <4 x float> @test(<4 x float> %A, <4 x float>* %B) nounwind { %BV = load <4 x float>* %B ; <<4 x float>> [#uses=1] %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %A, <4 x float> %BV ) ; <<4 x float>> [#uses=1] ret <4 x float> %tmp28 diff --git a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll index 4cac9b4c4a..e1f890192d 100644 --- a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll +++ b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \ ; RUN: grep push | count 3 -define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) { +define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) nounwind { entry: icmp sgt i32 %size, 0 ; <i1>:0 [#uses=1] br i1 %0, label %bb.preheader, label %return diff --git a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll index 67323e87ef..2c2706de5d 100644 --- a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll +++ b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll @@ -36,7 +36,9 @@ bb.i6.i: ; preds = %bb.i6.i, %stepsystem.exit.i bb107.i.i: ; preds = %bb107.i.i, %bb.i6.i %q_addr.0.i.i.in = phi %struct.bnode** [ null, %bb107.i.i ], [ %4, %bb.i6.i ] ; <%struct.bnode**> [#uses=1] - %q_addr.0.i.i = load %struct.bnode** %q_addr.0.i.i.in ; <%struct.bnode*> [#uses=0] + %q_addr.0.i.i = load %struct.bnode** %q_addr.0.i.i.in ; <%struct.bnode*> [#uses=1] + %q_addr.1 = getelementptr %struct.anon* %0, i32 0, i32 4, i32 1 + store %struct.bnode* %q_addr.0.i.i, %struct.bnode** %q_addr.1, align 4 br label %bb107.i.i bb47.loopexit.i: ; preds = %bb32.i diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll index 721d4c945b..8e315f4d80 100644 --- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll +++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll @@ -35,7 +35,7 @@ cond_next36.i: ; preds = %cond_next.i bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i ; CHECK: %bb.i28.i ; CHECK: addl $2 -; CHECK: addl $2 +; CHECK: addl $-2 %j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2] %din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1] %tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2] diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll index 8d6bb0df1f..a91ac27f98 100644 --- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll +++ b/test/CodeGen/X86/2008-02-22-ReMatBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 3 +; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 2 ; rdar://5761454 %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* } diff --git a/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/test/CodeGen/X86/2008-07-11-SpillerBug.ll index cd99c0e53c..548b44db6d 100644 --- a/test/CodeGen/X86/2008-07-11-SpillerBug.ll +++ b/test/CodeGen/X86/2008-07-11-SpillerBug.ll @@ -1,9 +1,7 @@ ; RUN: llc < %s -march=x86 -relocation-model=static -disable-fp-elim -post-RA-scheduler=false -asm-verbose=0 | FileCheck %s ; PR2536 - -; CHECK: movw %cx -; CHECK-NEXT: andl $65534, % +; CHECK: andl $65534, % ; CHECK-NEXT: movl % ; CHECK-NEXT: movl $17 diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll index 67e14ffae5..4c6493445a 100644 --- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll +++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 58 +; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 55 ; PR2568 @g_3 = external global i16 ; <i16*> [#uses=1] diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll index 72c7ee93a9..0dca14d064 100644 --- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll +++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll @@ -3,74 +3,83 @@ ; PR3538 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9" - %llvm.dbg.anchor.type = type { i32, i32 } - %llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 } - %llvm.dbg.block.type = type { i32, { }* } - %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* } - %llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }* } - %llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 } - %llvm.dbg.subrange.type = type { i32, i64, i64 } - %llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* } -@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata" ; <%llvm.dbg.anchor.type*> [#uses=1] -@.str = internal constant [4 x i8] c"t.c\00", section "llvm.metadata" ; <[4 x i8]*> [#uses=1] -@.str1 = internal constant [2 x i8] c".\00", section "llvm.metadata" ; <[2 x i8]*> [#uses=1] -@.str2 = internal constant [6 x i8] c"clang\00", section "llvm.metadata" ; <[6 x i8]*> [#uses=1] -@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([2 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str2, i32 0, i32 0), i1 false, i1 false, i8* null }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -@.str3 = internal constant [4 x i8] c"int\00", section "llvm.metadata" ; <[4 x i8]*> [#uses=1] -@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata" ; <%llvm.dbg.basictype.type*> [#uses=1] -@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata" ; <%llvm.dbg.anchor.type*> [#uses=1] -@.str4 = internal constant [5 x i8] c"test\00", section "llvm.metadata" ; <[5 x i8]*> [#uses=1] -@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([5 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str4, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@.str5 = internal constant [2 x i8] c"X\00", section "llvm.metadata" ; <[2 x i8]*> [#uses=1] -@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata" ; <%llvm.dbg.variable.type*> [#uses=1] -@llvm.dbg.block = internal constant %llvm.dbg.block.type { i32 458763, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*) }, section "llvm.metadata" ; <%llvm.dbg.block.type*> [#uses=1] -@llvm.dbg.subrange = internal constant %llvm.dbg.subrange.type { i32 458785, i64 0, i64 0 }, section "llvm.metadata" ; <%llvm.dbg.subrange.type*> [#uses=1] -@llvm.dbg.array = internal constant [1 x { }*] [{ }* bitcast (%llvm.dbg.subrange.type* @llvm.dbg.subrange to { }*)], section "llvm.metadata" ; <[1 x { }*]*> [#uses=1] -@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458753, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 0, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast ([1 x { }*]* @llvm.dbg.array to { }*) }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -@.str6 = internal constant [2 x i8] c"Y\00", section "llvm.metadata" ; <[2 x i8]*> [#uses=1] -@llvm.dbg.variable7 = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.block.type* @llvm.dbg.block to { }*), i8* getelementptr ([2 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*) }, section "llvm.metadata" ; <%llvm.dbg.variable.type*> [#uses=1] - -define i32 @test(i32 %X) nounwind { +define signext i8 @foo(i8* %s1) nounwind ssp { entry: - %retval = alloca i32 ; <i32*> [#uses=1] - %X.addr = alloca i32 ; <i32*> [#uses=3] - %saved_stack = alloca i8* ; <i8**> [#uses=2] - call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) - store i32 %X, i32* %X.addr - %0 = bitcast i32* %X.addr to { }* ; <{ }*> [#uses=1] - call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable to { }*)) - call void @llvm.dbg.region.start({ }* bitcast (%llvm.dbg.block.type* @llvm.dbg.block to { }*)) - call void @llvm.dbg.stoppoint(i32 4, i32 3, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) - %1 = call i8* @llvm.stacksave() ; <i8*> [#uses=1] - store i8* %1, i8** %saved_stack - %tmp = load i32* %X.addr ; <i32> [#uses=1] - %2 = mul i32 4, %tmp ; <i32> [#uses=1] - %vla = alloca i8, i32 %2 ; <i8*> [#uses=1] - %tmp1 = bitcast i8* %vla to i32* ; <i32*> [#uses=1] - %3 = bitcast i32* %tmp1 to { }* ; <{ }*> [#uses=1] - call void @llvm.dbg.declare({ }* %3, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable7 to { }*)) - call void @llvm.dbg.stoppoint(i32 5, i32 1, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) - call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.block.type* @llvm.dbg.block to { }*)) - br label %cleanup + %s1_addr = alloca i8* ; <i8**> [#uses=2] + %retval = alloca i32 ; <i32*> [#uses=2] + %saved_stack.1 = alloca i8* ; <i8**> [#uses=2] + %0 = alloca i32 ; <i32*> [#uses=2] + %str.0 = alloca [0 x i8]* ; <[0 x i8]**> [#uses=3] + %1 = alloca i64 ; <i64*> [#uses=2] + %2 = alloca i64 ; <i64*> [#uses=1] + %3 = alloca i64 ; <i64*> [#uses=6] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call void @llvm.dbg.declare(metadata !{i8** %s1_addr}, metadata !0), !dbg !7 + store i8* %s1, i8** %s1_addr + call void @llvm.dbg.declare(metadata !{[0 x i8]** %str.0}, metadata !8), !dbg !7 + %4 = call i8* @llvm.stacksave(), !dbg !7 ; <i8*> [#uses=1] + store i8* %4, i8** %saved_stack.1, align 8, !dbg !7 + %5 = load i8** %s1_addr, align 8, !dbg !13 ; <i8*> [#uses=1] + %6 = call i64 @strlen(i8* %5) nounwind readonly, !dbg !13 ; <i64> [#uses=1] + %7 = add i64 %6, 1, !dbg !13 ; <i64> [#uses=1] + store i64 %7, i64* %3, align 8, !dbg !13 + %8 = load i64* %3, align 8, !dbg !13 ; <i64> [#uses=1] + %9 = sub nsw i64 %8, 1, !dbg !13 ; <i64> [#uses=0] + %10 = load i64* %3, align 8, !dbg !13 ; <i64> [#uses=1] + %11 = mul i64 %10, 8, !dbg !13 ; <i64> [#uses=0] + %12 = load i64* %3, align 8, !dbg !13 ; <i64> [#uses=1] + store i64 %12, i64* %2, align 8, !dbg !13 + %13 = load i64* %3, align 8, !dbg !13 ; <i64> [#uses=1] + %14 = mul i64 %13, 8, !dbg !13 ; <i64> [#uses=0] + %15 = load i64* %3, align 8, !dbg !13 ; <i64> [#uses=1] + store i64 %15, i64* %1, align 8, !dbg !13 + %16 = load i64* %1, align 8, !dbg !13 ; <i64> [#uses=1] + %17 = trunc i64 %16 to i32, !dbg !13 ; <i32> [#uses=1] + %18 = alloca i8, i32 %17, !dbg !13 ; <i8*> [#uses=1] + %19 = bitcast i8* %18 to [0 x i8]*, !dbg !13 ; <[0 x i8]*> [#uses=1] + store [0 x i8]* %19, [0 x i8]** %str.0, align 8, !dbg !13 + %20 = load [0 x i8]** %str.0, align 8, !dbg !15 ; <[0 x i8]*> [#uses=1] + %21 = getelementptr inbounds [0 x i8]* %20, i64 0, i64 0, !dbg !15 ; <i8*> [#uses=1] + store i8 0, i8* %21, align 1, !dbg !15 + %22 = load [0 x i8]** %str.0, align 8, !dbg !16 ; <[0 x i8]*> [#uses=1] + %23 = getelementptr inbounds [0 x i8]* %22, i64 0, i64 0, !dbg !16 ; <i8*> [#uses=1] + %24 = load i8* %23, align 1, !dbg !16 ; <i8> [#uses=1] + %25 = sext i8 %24 to i32, !dbg !16 ; <i32> [#uses=1] + store i32 %25, i32* %0, align 4, !dbg !16 + %26 = load i8** %saved_stack.1, align 8, !dbg !16 ; <i8*> [#uses=1] + call void @llvm.stackrestore(i8* %26), !dbg !16 + %27 = load i32* %0, align 4, !dbg !16 ; <i32> [#uses=1] + store i32 %27, i32* %retval, align 4, !dbg !16 + br label %return, !dbg !16 -cleanup: ; preds = %entry - %tmp2 = load i8** %saved_stack ; <i8*> [#uses=1] - call void @llvm.stackrestore(i8* %tmp2) - call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) - %4 = load i32* %retval ; <i32> [#uses=1] - ret i32 %4 +return: ; preds = %entry + %retval1 = load i32* %retval, !dbg !16 ; <i32> [#uses=1] + %retval12 = trunc i32 %retval1 to i8, !dbg !16 ; <i8> [#uses=1] + ret i8 %retval12, !dbg !16 } -declare void @llvm.dbg.func.start({ }*) nounwind - -declare void @llvm.dbg.declare({ }*, { }*) nounwind - -declare void @llvm.dbg.region.start({ }*) nounwind - -declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone declare i8* @llvm.stacksave() nounwind +declare i64 @strlen(i8*) nounwind readonly + declare void @llvm.stackrestore(i8*) nounwind -declare void @llvm.dbg.region.end({ }*) nounwind +!0 = metadata !{i32 459009, metadata !1, metadata !"s1", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ] +!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 458769, i32 0, i32 1, metadata !"vla.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 458773, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5, metadata !6} +!5 = metadata !{i32 458788, metadata !2, metadata !"char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ] +!7 = metadata !{i32 2, i32 0, metadata !1, null} +!8 = metadata !{i32 459008, metadata !1, metadata !"str.0", metadata !2, i32 3, metadata !9} ; [ DW_TAG_auto_variable ] +!9 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ] +!10 = metadata !{i32 458753, metadata !2, metadata !"", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !5, metadata !11, i32 0, null} ; [ DW_TAG_array_type ] +!11 = metadata !{metadata !12} +!12 = metadata !{i32 458785, i64 0, i64 0} ; [ DW_TAG_subrange_type ] +!13 = metadata !{i32 3, i32 0, metadata !14, null} +!14 = metadata !{i32 458763, metadata !1, i32 0, i32 0} ; [ DW_TAG_lexical_block ] +!15 = metadata !{i32 4, i32 0, metadata !14, null} +!16 = metadata !{i32 5, i32 0, metadata !14, null} diff --git a/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll b/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll deleted file mode 100644 index e21c8923df..0000000000 --- a/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll +++ /dev/null @@ -1,102 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s - -; CHECK: _foo: -; CHECK: pavgw LCPI1_4(%rip) - -; rdar://7057804 - -define void @foo(i16* %out8x8, i16* %in8x8, i32 %lastrow) optsize ssp { -entry: - %0 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2] - %1 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %0, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] - %2 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] - %3 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %2, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp.i.i10 = add <8 x i16> %0, %3 ; <<8 x i16>> [#uses=1] - %4 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> zeroinitializer, <8 x i16> %1) nounwind readnone ; <<8 x i16>> [#uses=1] - %5 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i10, <8 x i16> %4) nounwind readnone ; <<8 x i16>> [#uses=3] - %6 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] - %7 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2] - %8 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %7, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] - %9 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] - %10 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %9, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp.i.i8 = add <8 x i16> %7, %10 ; <<8 x i16>> [#uses=1] - %11 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %8) nounwind readnone ; <<8 x i16>> [#uses=1] - %12 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i8, <8 x i16> %11) nounwind readnone ; <<8 x i16>> [#uses=1] - %13 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] - %14 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] - %15 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] - %16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %6, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] - %17 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %12, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] - %18 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %13, <8 x i16> %15) nounwind readnone ; <<8 x i16>> [#uses=1] - %19 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %14) nounwind readnone ; <<8 x i16>> [#uses=2] - %20 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=4] - %21 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %17) nounwind readnone ; <<8 x i16>> [#uses=1] - %22 = bitcast <8 x i16> %21 to <2 x i64> ; <<2 x i64>> [#uses=1] - %23 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2] - %24 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %23, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] - %25 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] - %26 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %25, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp.i.i6 = add <8 x i16> %23, %26 ; <<8 x i16>> [#uses=1] - %27 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %24) nounwind readnone ; <<8 x i16>> [#uses=1] - %28 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i6, <8 x i16> %27) nounwind readnone ; <<8 x i16>> [#uses=1] - %29 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2] - %30 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %29, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] - %31 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] - %32 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %31, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp.i.i4 = add <8 x i16> %29, %32 ; <<8 x i16>> [#uses=1] - %33 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %30) nounwind readnone ; <<8 x i16>> [#uses=1] - %34 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i4, <8 x i16> %33) nounwind readnone ; <<8 x i16>> [#uses=1] - %35 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> %20) nounwind readnone ; <<8 x i16>> [#uses=2] - %tmp.i2.i1 = mul <8 x i16> %20, <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170> ; <<8 x i16>> [#uses=1] - %36 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %35, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] - %37 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i1, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] - %38 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %37, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp.i.i2 = add <8 x i16> %35, %38 ; <<8 x i16>> [#uses=1] - %39 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %36) nounwind readnone ; <<8 x i16>> [#uses=1] - %40 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i2, <8 x i16> %39) nounwind readnone ; <<8 x i16>> [#uses=1] - %41 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> %20) nounwind readnone ; <<8 x i16>> [#uses=2] - %tmp.i2.i = mul <8 x i16> %20, <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170> ; <<8 x i16>> [#uses=1] - %42 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %41, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] - %43 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] - %44 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %43, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp.i.i = add <8 x i16> %41, %44 ; <<8 x i16>> [#uses=1] - %45 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %42) nounwind readnone ; <<8 x i16>> [#uses=1] - %46 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i, <8 x i16> %45) nounwind readnone ; <<8 x i16>> [#uses=1] - %47 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %18, <8 x i16> %16) nounwind readnone ; <<8 x i16>> [#uses=1] - %48 = bitcast <8 x i16> %47 to <2 x i64> ; <<2 x i64>> [#uses=1] - %49 = bitcast <8 x i16> %28 to <2 x i64> ; <<2 x i64>> [#uses=1] - %50 = getelementptr i16* %out8x8, i64 8 ; <i16*> [#uses=1] - %51 = bitcast i16* %50 to <2 x i64>* ; <<2 x i64>*> [#uses=1] - store <2 x i64> %49, <2 x i64>* %51, align 16 - %52 = bitcast <8 x i16> %40 to <2 x i64> ; <<2 x i64>> [#uses=1] - %53 = getelementptr i16* %out8x8, i64 16 ; <i16*> [#uses=1] - %54 = bitcast i16* %53 to <2 x i64>* ; <<2 x i64>*> [#uses=1] - store <2 x i64> %52, <2 x i64>* %54, align 16 - %55 = getelementptr i16* %out8x8, i64 24 ; <i16*> [#uses=1] - %56 = bitcast i16* %55 to <2 x i64>* ; <<2 x i64>*> [#uses=1] - store <2 x i64> %48, <2 x i64>* %56, align 16 - %57 = bitcast <8 x i16> %46 to <2 x i64> ; <<2 x i64>> [#uses=1] - %58 = getelementptr i16* %out8x8, i64 40 ; <i16*> [#uses=1] - %59 = bitcast i16* %58 to <2 x i64>* ; <<2 x i64>*> [#uses=1] - store <2 x i64> %57, <2 x i64>* %59, align 16 - %60 = bitcast <8 x i16> %34 to <2 x i64> ; <<2 x i64>> [#uses=1] - %61 = getelementptr i16* %out8x8, i64 48 ; <i16*> [#uses=1] - %62 = bitcast i16* %61 to <2 x i64>* ; <<2 x i64>*> [#uses=1] - store <2 x i64> %60, <2 x i64>* %62, align 16 - %63 = getelementptr i16* %out8x8, i64 56 ; <i16*> [#uses=1] - %64 = bitcast i16* %63 to <2 x i64>* ; <<2 x i64>*> [#uses=1] - store <2 x i64> %22, <2 x i64>* %64, align 16 - ret void -} - -declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone - -declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone - -declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone - -declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone - -declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone - -declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone diff --git a/test/CodeGen/X86/2009-09-07-CoalescerBug.ll b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll index a5b4a79401..41b4bc0872 100644 --- a/test/CodeGen/X86/2009-09-07-CoalescerBug.ll +++ b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll @@ -8,8 +8,7 @@ define i64 @hammer_time(i64 %modulep, i64 %physfree) nounwind ssp noredzone noimplicitfloat { ; CHECK: hammer_time: ; CHECK: movq $Xrsvd, %rax -; CHECK: movq $Xrsvd, %rsi -; CHECK: movq $Xrsvd, %rdi +; CHECK: movq $Xrsvd, %rcx entry: br i1 undef, label %if.then, label %if.end diff --git a/test/CodeGen/X86/2010-02-11-NonTemporal.ll b/test/CodeGen/X86/2010-02-11-NonTemporal.ll new file mode 100644 index 0000000000..5789a0b984 --- /dev/null +++ b/test/CodeGen/X86/2010-02-11-NonTemporal.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s +; CHECK: movnt +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +!0 = metadata !{ i32 1 } + +define void @sub_(i32* noalias %n) { +"file movnt.f90, line 2, bb1": + %n1 = alloca i32*, align 8 + %i = alloca i32, align 4 + %"$LCS_0" = alloca i64, align 8 + %"$LCS_S2" = alloca <2 x double>, align 16 + %r9 = load <2 x double>* %"$LCS_S2", align 8 + %r10 = load i64* %"$LCS_0", align 8 + %r11 = inttoptr i64 %r10 to <2 x double>* + store <2 x double> %r9, <2 x double>* %r11, align 16, !nontemporal !0 + br label %"file movnt.f90, line 18, bb5" + +"file movnt.f90, line 18, bb5": + ret void +} diff --git a/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll b/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll new file mode 100644 index 0000000000..c5d3d16f81 --- /dev/null +++ b/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll @@ -0,0 +1,260 @@ +; RUN: llc < %s > %t +; PR6283 + +; Tricky coalescer bug: +; After coalescing %RAX with a virtual register, this instruction was rematted: +; +; %EAX<def> = MOV32rr %reg1070<kill> +; +; This instruction silently defined %RAX, and when rematting removed the +; instruction, the live interval for %RAX was not properly updated. The valno +; referred to a deleted instruction and bad things happened. +; +; The fix is to implicitly define %RAX when coalescing: +; +; %EAX<def> = MOV32rr %reg1070<kill>, %RAX<imp-def> +; + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +module asm "\09.ident\09\22GCC: (GNU) 4.5.0 20100212 (experimental) LLVM: 95975\22" + +%0 = type { %"union gimple_statement_d"* } +%"BITMAP_WORD[]" = type [2 x i64] +%"char[]" = type [4 x i8] +%"enum dom_state[]" = type [2 x i32] +%"int[]" = type [4 x i32] +%"struct VEC_basic_block_base" = type { i32, i32, [1 x %"struct basic_block_def"*] } +%"struct VEC_basic_block_gc" = type { %"struct VEC_basic_block_base" } +%"struct VEC_edge_base" = type { i32, i32, [1 x %"struct edge_def"*] } +%"struct VEC_edge_gc" = type { %"struct VEC_edge_base" } +%"struct VEC_gimple_base" = type { i32, i32, [1 x %"union gimple_statement_d"*] } +%"struct VEC_gimple_gc" = type { %"struct VEC_gimple_base" } +%"struct VEC_iv_cand_p_base" = type { i32, i32, [1 x %"struct iv_cand"*] } +%"struct VEC_iv_cand_p_heap" = type { %"struct VEC_iv_cand_p_base" } +%"struct VEC_iv_use_p_base" = type { i32, i32, [1 x %"struct iv_use"*] } +%"struct VEC_iv_use_p_heap" = type { %"struct VEC_iv_use_p_base" } +%"struct VEC_loop_p_base" = type { i32, i32, [1 x %"struct loop"*] } +%"struct VEC_loop_p_gc" = type { %"struct VEC_loop_p_base" } +%"struct VEC_rtx_base" = type { i32, i32, [1 x %"struct rtx_def"*] } +%"struct VEC_rtx_gc" = type { %"struct VEC_rtx_base" } +%"struct VEC_tree_base" = type { i32, i32, [1 x %"union tree_node"*] } +%"struct VEC_tree_gc" = type { %"struct VEC_tree_base" } +%"struct _obstack_chunk" = type { i8*, %"struct _obstack_chunk"*, %"char[]" } +%"struct basic_block_def" = type { %"struct VEC_edge_gc"*, %"struct VEC_edge_gc"*, i8*, %"struct loop"*, [2 x %"struct et_node"*], %"struct basic_block_def"*, %"struct basic_block_def"*, %"union basic_block_il_dependent", i64, i32, i32, i32, i32, i32 } +%"struct bitmap_element" = type { %"struct bitmap_element"*, %"struct bitmap_element"*, i32, %"BITMAP_WORD[]" } +%"struct bitmap_head_def" = type { %"struct bitmap_element"*, %"struct bitmap_element"*, i32, %"struct bitmap_obstack"* } +%"struct bitmap_obstack" = type { %"struct bitmap_element"*, %"struct bitmap_head_def"*, %"struct obstack" } +%"struct block_symbol" = type { [3 x %"union rtunion"], %"struct object_block"*, i64 } +%"struct comp_cost" = type { i32, i32 } +%"struct control_flow_graph" = type { %"struct basic_block_def"*, %"struct basic_block_def"*, %"struct VEC_basic_block_gc"*, i32, i32, i32, %"struct VEC_basic_block_gc"*, i32, %"enum dom_state[]", %"enum dom_state[]", i32, i32 } +%"struct cost_pair" = type { %"struct iv_cand"*, %"struct comp_cost", %"struct bitmap_head_def"*, %"union tree_node"* } +%"struct def_optype_d" = type { %"struct def_optype_d"*, %"union tree_node"** } +%"struct double_int" = type { i64, i64 } +%"struct edge_def" = type { %"struct basic_block_def"*, %"struct basic_block_def"*, %"union edge_def_insns", i8*, %"union tree_node"*, i32, i32, i32, i32, i64 } +%"struct eh_status" = type opaque +%"struct et_node" = type opaque +%"struct function" = type { %"struct eh_status"*, %"struct control_flow_graph"*, %"struct gimple_seq_d"*, %"struct gimple_df"*, %"struct loops"*, %"struct htab"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"struct machine_function"*, %"struct language_function"*, %"struct htab"*, i32, i32, i32, i32, i32, i32, i8*, i8, i8, i8, i8 } +%"struct gimple_bb_info" = type { %"struct gimple_seq_d"*, %"struct gimple_seq_d"* } +%"struct gimple_df" = type { %"struct htab"*, %"struct VEC_gimple_gc"*, %"struct VEC_tree_gc"*, %"union tree_node"*, %"struct pt_solution", %"struct pt_solution", %"struct pointer_map_t"*, %"union tree_node"*, %"struct htab"*, %"struct bitmap_head_def"*, i8, %"struct ssa_operands" } +%"struct gimple_seq_d" = type { %"struct gimple_seq_node_d"*, %"struct gimple_seq_node_d"*, %"struct gimple_seq_d"* } +%"struct gimple_seq_node_d" = type { %"union gimple_statement_d"*, %"struct gimple_seq_node_d"*, %"struct gimple_seq_node_d"* } +%"struct gimple_statement_base" = type { i8, i8, i16, i32, i32, i32, %"struct basic_block_def"*, %"union tree_node"* } +%"struct gimple_statement_phi" = type { %"struct gimple_statement_base", i32, i32, %"union tree_node"*, %"struct phi_arg_d[]" } +%"struct htab" = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i64, i64, i64, i32, i32, i8* (i64, i64)*, void (i8*)*, i8*, i8* (i8*, i64, i64)*, void (i8*, i8*)*, i32 } +%"struct iv" = type { %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i8, i8, i32 } +%"struct iv_cand" = type { i32, i8, i32, %"union gimple_statement_d"*, %"union tree_node"*, %"union tree_node"*, %"struct iv"*, i32, i32, %"struct iv_use"*, %"struct bitmap_head_def"* } +%"struct iv_use" = type { i32, i32, %"struct iv"*, %"union gimple_statement_d"*, %"union tree_node"**, %"struct bitmap_head_def"*, i32, %"struct cost_pair"*, %"struct iv_cand"* } +%"struct ivopts_data" = type { %"struct loop"*, %"struct pointer_map_t"*, i32, i32, %"struct version_info"*, %"struct bitmap_head_def"*, %"struct VEC_iv_use_p_heap"*, %"struct VEC_iv_cand_p_heap"*, %"struct bitmap_head_def"*, i32, i8, i8 } +%"struct lang_decl" = type opaque +%"struct language_function" = type opaque +%"struct loop" = type { i32, i32, %"struct basic_block_def"*, %"struct basic_block_def"*, %"struct comp_cost", i32, i32, %"struct VEC_loop_p_gc"*, %"struct loop"*, %"struct loop"*, i8*, %"union tree_node"*, %"struct double_int", %"struct double_int", i8, i8, i32, %"struct nb_iter_bound"*, %"struct loop_exit"*, i8, %"union tree_node"* } +%"struct loop_exit" = type { %"struct edge_def"*, %"struct loop_exit"*, %"struct loop_exit"*, %"struct loop_exit"* } +%"struct loops" = type { i32, %"struct VEC_loop_p_gc"*, %"struct htab"*, %"struct loop"* } +%"struct machine_cfa_state" = type { %"struct rtx_def"*, i64 } +%"struct machine_function" = type { %"struct stack_local_entry"*, i8*, i32, i32, %"int[]", i32, %"struct machine_cfa_state", i32, i8 } +%"struct nb_iter_bound" = type { %"union gimple_statement_d"*, %"struct double_int", i8, %"struct nb_iter_bound"* } +%"struct object_block" = type { %"union section"*, i32, i64, %"struct VEC_rtx_gc"*, %"struct VEC_rtx_gc"* } +%"struct obstack" = type { i64, %"struct _obstack_chunk"*, i8*, i8*, i8*, i64, i32, %"struct _obstack_chunk"* (i8*, i64)*, void (i8*, %"struct _obstack_chunk"*)*, i8*, i8 } +%"struct phi_arg_d" = type { %"struct ssa_use_operand_d", %"union tree_node"*, i32 } +%"struct phi_arg_d[]" = type [1 x %"struct phi_arg_d"] +%"struct pointer_map_t" = type opaque +%"struct pt_solution" = type { i8, %"struct bitmap_head_def"* } +%"struct rtx_def" = type { i16, i8, i8, %"union u" } +%"struct section_common" = type { i32 } +%"struct ssa_operand_memory_d" = type { %"struct ssa_operand_memory_d"*, %"uchar[]" } +%"struct ssa_operands" = type { %"struct ssa_operand_memory_d"*, i32, i32, i8, %"struct def_optype_d"*, %"struct use_optype_d"* } +%"struct ssa_use_operand_d" = type { %"struct ssa_use_operand_d"*, %"struct ssa_use_operand_d"*, %0, %"union tree_node"** } +%"struct stack_local_entry" = type opaque +%"struct tree_base" = type <{ i16, i8, i8, i8, [2 x i8], i8 }> +%"struct tree_common" = type { %"struct tree_base", %"union tree_node"*, %"union tree_node"* } +%"struct tree_decl_common" = type { %"struct tree_decl_minimal", %"union tree_node"*, i8, i8, i8, i8, i8, i32, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"struct lang_decl"* } +%"struct tree_decl_minimal" = type { %"struct tree_common", i32, i32, %"union tree_node"*, %"union tree_node"* } +%"struct tree_decl_non_common" = type { %"struct tree_decl_with_vis", %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"* } +%"struct tree_decl_with_rtl" = type { %"struct tree_decl_common", %"struct rtx_def"* } +%"struct tree_decl_with_vis" = type { %"struct tree_decl_with_rtl", %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i8, i8, i8 } +%"struct tree_function_decl" = type { %"struct tree_decl_non_common", %"struct function"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i16, i8, i8 } +%"struct unnamed_section" = type { %"struct section_common", void (i8*)*, i8*, %"union section"* } +%"struct use_optype_d" = type { %"struct use_optype_d"*, %"struct ssa_use_operand_d" } +%"struct version_info" = type { %"union tree_node"*, %"struct iv"*, i8, i32, i8 } +%"uchar[]" = type [1 x i8] +%"union basic_block_il_dependent" = type { %"struct gimple_bb_info"* } +%"union edge_def_insns" = type { %"struct gimple_seq_d"* } +%"union gimple_statement_d" = type { %"struct gimple_statement_phi" } +%"union rtunion" = type { i8* } +%"union section" = type { %"struct unnamed_section" } +%"union tree_node" = type { %"struct tree_function_decl" } +%"union u" = type { %"struct block_symbol" } + +declare fastcc %"union tree_node"* @get_computation_at(%"struct loop"*, %"struct iv_use"* nocapture, %"struct iv_cand"* nocapture, %"union gimple_statement_d"*) nounwind + +declare fastcc i32 @computation_cost(%"union tree_node"*, i8 zeroext) nounwind + +define fastcc i64 @get_computation_cost_at(%"struct ivopts_data"* %data, %"struct iv_use"* nocapture %use, %"struct iv_cand"* nocapture %cand, i8 zeroext %address_p, %"struct bitmap_head_def"** %depends_on, %"union gimple_statement_d"* %at, i8* %can_autoinc) nounwind { +entry: + br i1 undef, label %"100", label %"4" + +"4": ; preds = %entry + br i1 undef, label %"6", label %"5" + +"5": ; preds = %"4" + unreachable + +"6": ; preds = %"4" + br i1 undef, label %"8", label %"7" + +"7": ; preds = %"6" + unreachable + +"8": ; preds = %"6" + br i1 undef, label %"100", label %"10" + +"10": ; preds = %"8" + br i1 undef, label %"17", label %"16" + +"16": ; preds = %"10" + unreachable + +"17": ; preds = %"10" + br i1 undef, label %"19", label %"18" + +"18": ; preds = %"17" + unreachable + +"19": ; preds = %"17" + br i1 undef, label %"93", label %"20" + +"20": ; preds = %"19" + br i1 undef, label %"23", label %"21" + +"21": ; preds = %"20" + unreachable + +"23": ; preds = %"20" + br i1 undef, label %"100", label %"25" + +"25": ; preds = %"23" + br i1 undef, label %"100", label %"26" + +"26": ; preds = %"25" + br i1 undef, label %"30", label %"28" + +"28": ; preds = %"26" + unreachable + +"30": ; preds = %"26" + br i1 undef, label %"59", label %"51" + +"51": ; preds = %"30" + br i1 undef, label %"55", label %"52" + +"52": ; preds = %"51" + unreachable + +"55": ; preds = %"51" + %0 = icmp ugt i32 0, undef ; <i1> [#uses=1] + br i1 %0, label %"50.i", label %"9.i" + +"9.i": ; preds = %"55" + unreachable + +"50.i": ; preds = %"55" + br i1 undef, label %"55.i", label %"54.i" + +"54.i": ; preds = %"50.i" + br i1 undef, label %"57.i", label %"55.i" + +"55.i": ; preds = %"54.i", %"50.i" + unreachable + +"57.i": ; preds = %"54.i" + br label %"63.i" + +"61.i": ; preds = %"63.i" + br i1 undef, label %"64.i", label %"62.i" + +"62.i": ; preds = %"61.i" + br label %"63.i" + +"63.i": ; preds = %"62.i", %"57.i" + br i1 undef, label %"61.i", label %"64.i" + +"64.i": ; preds = %"63.i", %"61.i" + unreachable + +"59": ; preds = %"30" + br i1 undef, label %"60", label %"82" + +"60": ; preds = %"59" + br i1 undef, label %"61", label %"82" + +"61": ; preds = %"60" + br i1 undef, label %"62", label %"82" + +"62": ; preds = %"61" + br i1 undef, label %"100", label %"63" + +"63": ; preds = %"62" + br i1 undef, label %"65", label %"64" + +"64": ; preds = %"63" + unreachable + +"65": ; preds = %"63" + br i1 undef, label %"66", label %"67" + +"66": ; preds = %"65" + unreachable + +"67": ; preds = %"65" + %1 = load i32* undef, align 4 ; <i32> [#uses=0] + br label %"100" + +"82": ; preds = %"61", %"60", %"59" + unreachable + +"93": ; preds = %"19" + %2 = call fastcc %"union tree_node"* @get_computation_at(%"struct loop"* undef, %"struct iv_use"* %use, %"struct iv_cand"* %cand, %"union gimple_statement_d"* %at) nounwind ; <%"union tree_node"*> [#uses=1] + br i1 undef, label %"100", label %"97" + +"97": ; preds = %"93" + br i1 undef, label %"99", label %"98" + +"98": ; preds = %"97" + br label %"99" + +"99": ; preds = %"98", %"97" + %3 = phi %"union tree_node"* [ undef, %"98" ], [ %2, %"97" ] ; <%"union tree_node"*> [#uses=1] + %4 = call fastcc i32 @computation_cost(%"union tree_node"* %3, i8 zeroext undef) nounwind ; <i32> [#uses=1] + br label %"100" + +"100": ; preds = %"99", %"93", %"67", %"62", %"25", %"23", %"8", %entry + %memtmp1.1.0 = phi i32 [ 0, %"99" ], [ 10000000, %entry ], [ 10000000, %"8" ], [ 10000000, %"23" ], [ 10000000, %"25" ], [ undef, %"62" ], [ undef, %"67" ], [ 10000000, %"93" ] ; <i32> [#uses=1] + %memtmp1.0.0 = phi i32 [ %4, %"99" ], [ 10000000, %entry ], [ 10000000, %"8" ], [ 10000000, %"23" ], [ 10000000, %"25" ], [ undef, %"62" ], [ undef, %"67" ], [ 10000000, %"93" ] ; <i32> [#uses=1] + %5 = zext i32 %memtmp1.0.0 to i64 ; <i64> [#uses=1] + %6 = zext i32 %memtmp1.1.0 to i64 ; <i64> [#uses=1] + %7 = shl i64 %6, 32 ; <i64> [#uses=1] + %8 = or i64 %7, %5 ; <i64> [#uses=1] + ret i64 %8 +} diff --git a/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll b/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll new file mode 100644 index 0000000000..c429172852 --- /dev/null +++ b/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll @@ -0,0 +1,80 @@ +; RUN: llc < %s > %t +; PR6300 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +; When the "154" loops back onto itself, it defines a register after using it. +; The first value of the register is implicit-def. + +%"struct location_chain_def" = type { %"struct location_chain_def"*, %"struct rtx_def"*, %"struct rtx_def"*, i32 } +%"struct real_value" = type { i32, [5 x i32] } +%"struct rtx_def" = type { i16, i8, i8, %"union u" } +%"union u" = type { %"struct real_value" } + +define i32 @variable_union(i8** nocapture %slot, i8* nocapture %data) nounwind { +entry: + br i1 undef, label %"4.thread", label %"3" + +"4.thread": ; preds = %entry + unreachable + +"3": ; preds = %entry + br i1 undef, label %"19", label %"20" + +"19": ; preds = %"3" + unreachable + +"20": ; preds = %"3" + br i1 undef, label %"56.preheader", label %dv_onepart_p.exit + +dv_onepart_p.exit: ; preds = %"20" + unreachable + +"56.preheader": ; preds = %"20" + br label %"56" + +"50": ; preds = %"57" + br label %"56" + +"56": ; preds = %"50", %"56.preheader" + br i1 undef, label %"57", label %"58" + +"57": ; preds = %"56" + br i1 undef, label %"50", label %"58" + +"58": ; preds = %"57", %"56" + br i1 undef, label %"62", label %"63" + +"62": ; preds = %"58" + unreachable + +"63": ; preds = %"58" + br i1 undef, label %"67", label %"66" + +"66": ; preds = %"63" + br label %"67" + +"67": ; preds = %"66", %"63" + br label %"68" + +"68": ; preds = %"161", %"67" + br i1 undef, label %"153", label %"161" + +"153": ; preds = %"68" + br i1 undef, label %"160", label %bb.nph46 + +bb.nph46: ; preds = %"153" + br label %"154" + +"154": ; preds = %"154", %bb.nph46 + %0 = phi %"struct location_chain_def"** [ undef, %bb.nph46 ], [ %1, %"154" ] ; <%"struct location_chain_def"**> [#uses=1] + %1 = bitcast i8* undef to %"struct location_chain_def"** ; <%"struct location_chain_def"**> [#uses=1] + store %"struct location_chain_def"* undef, %"struct location_chain_def"** %0, align 4 + br i1 undef, label %"160", label %"154" + +"160": ; preds = %"154", %"153" + br label %"161" + +"161": ; preds = %"160", %"68" + br label %"68" +} diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll new file mode 100644 index 0000000000..eb21dc234a --- /dev/null +++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll @@ -0,0 +1,55 @@ +; RUN: llc -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s +; Check that lowered argumens do not overwrite the return address before it is moved. +; Bug 6225 +; +; If a call is a fastcc tail call and tail call optimization is enabled, the +; caller frame is replaced by the callee frame. This can require that arguments are +; placed on the former return address stack slot. Special care needs to be taken +; taken that the return address is moved / or stored in a register before +; lowering of arguments potentially overwrites the value. +; +; Move return address (76(%esp)) to a temporary register (%ebp) +; CHECK: movl 76(%esp), %ebp +; Overwrite return addresss +; CHECK: movl %ecx, 76(%esp) +; Move return address from temporary register (%ebp) to new stack location (60(%esp)) +; CHECK: movl %ebp, 60(%esp) + +%tupl_p = type [9 x i32]* + +declare fastcc void @l297(i32 %r10, i32 %r9, i32 %r8, i32 %r7, i32 %r6, i32 %r5, i32 %r3, i32 %r2) noreturn nounwind +declare fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind + +define fastcc void @l186(%tupl_p %r1) noreturn nounwind { +entry: + %ptr1 = getelementptr %tupl_p %r1, i32 0, i32 0 + %r2 = load i32* %ptr1 + %ptr3 = getelementptr %tupl_p %r1, i32 0, i32 1 + %r3 = load i32* %ptr3 + %ptr5 = getelementptr %tupl_p %r1, i32 0, i32 2 + %r4 = load i32* %ptr5 + %ptr7 = getelementptr %tupl_p %r1, i32 0, i32 3 + %r5 = load i32* %ptr7 + %ptr9 = getelementptr %tupl_p %r1, i32 0, i32 4 + %r6 = load i32* %ptr9 + %ptr11 = getelementptr %tupl_p %r1, i32 0, i32 5 + %r7 = load i32* %ptr11 + %ptr13 = getelementptr %tupl_p %r1, i32 0, i32 6 + %r8 = load i32* %ptr13 + %ptr15 = getelementptr %tupl_p %r1, i32 0, i32 7 + %r9 = load i32* %ptr15 + %ptr17 = getelementptr %tupl_p %r1, i32 0, i32 8 + %r10 = load i32* %ptr17 + %cond = icmp eq i32 %r10, 3 + br i1 %cond, label %true, label %false + +true: + tail call fastcc void @l297(i32 %r10, i32 %r9, i32 %r8, i32 %r7, i32 %r6, i32 %r5, i32 %r3, i32 %r2) noreturn nounwind + ret void + +false: + tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind + ret void +} + + diff --git a/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll b/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll new file mode 100644 index 0000000000..6a58e9e551 --- /dev/null +++ b/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -march=x86 | FileCheck %s + +define i32* @t() nounwind optsize ssp { +entry: +; CHECK: t: +; CHECK: testl %eax, %eax +; CHECK: js + %cmp = icmp slt i32 undef, 0 ; <i1> [#uses=1] + %outsearch.0 = select i1 %cmp, i1 false, i1 true ; <i1> [#uses=1] + br i1 %outsearch.0, label %if.then27, label %if.else29 + +if.then27: ; preds = %entry + ret i32* undef + +if.else29: ; preds = %entry + unreachable +} + diff --git a/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll b/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll new file mode 100644 index 0000000000..8543c80117 --- /dev/null +++ b/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s +; PR6374 +; +; This test produces a DIV8r instruction and uses %AX instead of %AH and %AL. +; The DIV8r must have the right imp-defs for that to work. +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +%struct._i386_state = type { %union.anon } +%union.anon = type { [0 x i8] } + +define void @i386_aam(%struct._i386_state* nocapture %cpustate) nounwind ssp { +entry: + %call = tail call fastcc signext i8 @FETCH() ; <i8> [#uses=1] + %rem = urem i8 0, %call ; <i8> [#uses=1] + store i8 %rem, i8* undef + ret void +} + +declare fastcc signext i8 @FETCH() nounwind readnone ssp diff --git a/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll b/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll new file mode 100644 index 0000000000..4a26ba088e --- /dev/null +++ b/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll @@ -0,0 +1,49 @@ +; RUN: llc < %s +; PR6372 +; +; This test produces a move instruction with an implicitly defined super-register: +; +; %DL<def> = MOV8rr %reg1038<kill>, %RDX<imp-def> +; +; When %DL is rematerialized, we must remember to update live intervals for +; sub-registers %DX and %EDX. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +define noalias i8* @foo() nounwind ssp { +entry: + br i1 undef, label %for.end, label %for.body + +for.body: ; preds = %if.end40, %entry + %tmp6 = load i8* undef, align 2 ; <i8> [#uses=3] + %conv11 = sext i8 %tmp6 to i64 ; <i64> [#uses=1] + %cmp15 = icmp slt i64 %conv11, undef ; <i1> [#uses=1] + br i1 %cmp15, label %if.end, label %if.then + +if.then: ; preds = %for.body + %conv18 = sext i8 %tmp6 to i32 ; <i32> [#uses=1] + %call = tail call i32 (...)* @invalid(i32 0, i32 0, i32 %conv18) nounwind ; <i32> [#uses=0] + br label %if.end + +if.end: ; preds = %if.then, %for.body + %index.0 = phi i8 [ 0, %if.then ], [ %tmp6, %for.body ] ; <i8> [#uses=1] + store i8 %index.0, i8* undef + %tmp24 = load i8* undef ; <i8> [#uses=2] + br i1 undef, label %if.end40, label %if.then36 + +if.then36: ; preds = %if.end + %conv38 = sext i8 %tmp24 to i32 ; <i32> [#uses=1] + %call39 = tail call i32 (...)* @invalid(i32 0, i32 0, i32 %conv38) nounwind ; <i32> [#uses=0] + br label %if.end40 + +if.end40: ; preds = %if.then36, %if.end + %index.1 = phi i8 [ 0, %if.then36 ], [ %tmp24, %if.end ] ; <i8> [#uses=1] + store i8 %index.1, i8* undef + br i1 false, label %for.body, label %for.end + +for.end: ; preds = %if.end40, %entry + ret i8* undef +} + +declare i32 @invalid(...) diff --git a/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll b/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll new file mode 100644 index 0000000000..aeed401461 --- /dev/null +++ b/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll @@ -0,0 +1,146 @@ +; RUN: llc < %s +; PR6363 +; +; This test case creates a phi join register with a single definition. The other +; predecessor blocks are implicit-def. +; +; If LiveIntervalAnalysis fails to recognize this as a phi join, the coalescer +; will detect an infinity valno loop. +; +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @decode(i8* nocapture %input, i32 %offset, i8* nocapture %output) nounwind { +entry: + br i1 undef, label %meshBB86, label %meshBB102 + +bb: ; preds = %meshBB106, %meshBB102 + br i1 false, label %bb9, label %meshBB90 + +bb.nph: ; preds = %meshBB90 + br label %meshBB114 + +bb.nph.fragment: ; preds = %meshBB114 + br label %meshBB118 + +bb1.fragment: ; preds = %meshBB118 + br i1 false, label %bb2, label %bb3 + +bb2: ; preds = %bb1.fragment + br label %meshBB74 + +bb2.fragment15: ; preds = %meshBB74 + br label %meshBB98 + +bb3: ; preds = %bb1.fragment + br i1 undef, label %meshBB, label %meshBB102 + +bb4: ; preds = %meshBB + br label %meshBB118 + +bb4.fragment: ; preds = %meshBB118 + br label %meshBB82 + +bb5: ; preds = %meshBB102, %meshBB82 + br i1 false, label %bb6, label %bb7 + +bb6: ; preds = %bb5 + br label %bb7 + +bb7: ; preds = %meshBB98, %bb6, %bb5 + br label %meshBB114 + +bb7.fragment: ; preds = %meshBB114 + br i1 undef, label %meshBB74, label %bb9 + +bb9: ; preds = %bb7.fragment, %bb + br label %bb1.i23 + +bb1.i23: ; preds = %meshBB110, %bb9 + br i1 undef, label %meshBB106, label %meshBB110 + +skip_to_newline.exit26: ; preds = %meshBB106 + br label %meshBB86 + +skip_to_newline.exit26.fragment: ; preds = %meshBB86 + br i1 false, label %meshBB90, label %meshBB106 + +bb11.fragment: ; preds = %meshBB90, %meshBB86 + br label %meshBB122 + +bb1.i: ; preds = %meshBB122, %meshBB + %ooffset.2.lcssa.phi.SV.phi203 = phi i32 [ 0, %meshBB122 ], [ %ooffset.2.lcssa.phi.SV.phi233, %meshBB ] ; <i32> [#uses=1] + br label %meshBB98 + +bb1.i.fragment: ; preds = %meshBB98 + br i1 undef, label %meshBB78, label %meshBB + +skip_to_newline.exit: ; preds = %meshBB78 + br i1 undef, label %bb12, label %meshBB110 + +bb12: ; preds = %skip_to_newline.exit + br label %meshBB94 + +bb12.fragment: ; preds = %meshBB94 + br i1 false, label %bb13, label %meshBB78 + +bb13: ; preds = %bb12.fragment + br label %meshBB82 + +bb13.fragment: ; preds = %meshBB82 + br i1 undef, label %meshBB94, label %meshBB122 + +bb14: ; preds = %meshBB94 + ret i32 %ooffset.2.lcssa.phi.SV.phi250 + +bb15: ; preds = %meshBB122, %meshBB110, %meshBB78 + unreachable + +meshBB: ; preds = %bb1.i.fragment, %bb3 + %ooffset.2.lcssa.phi.SV.phi233 = phi i32 [ undef, %bb3 ], [ %ooffset.2.lcssa.phi.SV.phi209, %bb1.i.fragment ] ; <i32> [#uses=1] + br i1 undef, label %bb1.i, label %bb4 + +meshBB74: ; preds = %bb7.fragment, %bb2 + br i1 false, label %meshBB118, label %bb2.fragment15 + +meshBB78: ; preds = %bb12.fragment, %bb1.i.fragment + %ooffset.2.lcssa.phi.SV.phi239 = phi i32 [ %ooffset.2.lcssa.phi.SV.phi209, %bb1.i.fragment ], [ %ooffset.2.lcssa.phi.SV.phi250, %bb12.fragment ] ; <i32> [#uses=1] + br i1 false, label %bb15, label %skip_to_newline.exit + +meshBB82: ; preds = %bb13, %bb4.fragment + br i1 false, label %bb5, label %bb13.fragment + +meshBB86: ; preds = %skip_to_newline.exit26, %entry + br i1 undef, label %skip_to_newline.exit26.fragment, label %bb11.fragment + +meshBB90: ; preds = %skip_to_newline.exit26.fragment, %bb + br i1 false, label %bb11.fragment, label %bb.nph + +meshBB94: ; preds = %bb13.fragment, %bb12 + %ooffset.2.lcssa.phi.SV.phi250 = phi i32 [ 0, %bb13.fragment ], [ %ooffset.2.lcssa.phi.SV.phi239, %bb12 ] ; <i32> [#uses=2] + br i1 false, label %bb12.fragment, label %bb14 + +meshBB98: ; preds = %bb1.i, %bb2.fragment15 + %ooffset.2.lcssa.phi.SV.phi209 = phi i32 [ undef, %bb2.fragment15 ], [ %ooffset.2.lcssa.phi.SV.phi203, %bb1.i ] ; <i32> [#uses=2] + br i1 undef, label %bb1.i.fragment, label %bb7 + +meshBB102: ; preds = %bb3, %entry + br i1 undef, label %bb5, label %bb + +meshBB106: ; preds = %skip_to_newline.exit26.fragment, %bb1.i23 + br i1 undef, label %bb, label %skip_to_newline.exit26 + +meshBB110: ; preds = %skip_to_newline.exit, %bb1.i23 + br i1 false, label %bb15, label %bb1.i23 + +meshBB114: ; preds = %bb7, %bb.nph + %meshStackVariable115.phi = phi i32 [ 19, %bb7 ], [ 8, %bb.nph ] ; <i32> [#uses=0] + br i1 undef, label %bb.nph.fragment, label %bb7.fragment + +meshBB118: ; preds = %meshBB74, %bb4, %bb.nph.fragment + %meshCmp121 = icmp eq i32 undef, 10 ; <i1> [#uses=1] + br i1 %meshCmp121, label %bb4.fragment, label %bb1.fragment + +meshBB122: ; preds = %bb13.fragment, %bb11.fragment + br i1 false, label %bb1.i, label %bb15 +} diff --git a/test/CodeGen/X86/2010-03-04-Mul8Bug.ll b/test/CodeGen/X86/2010-03-04-Mul8Bug.ll new file mode 100644 index 0000000000..48e75e9572 --- /dev/null +++ b/test/CodeGen/X86/2010-03-04-Mul8Bug.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s +; PR6489 +; +; This test case produces a MUL8 instruction and then tries to read the result +; from the AX register instead of AH/AL. That confuses live interval analysis. +; +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +define void @func_56(i64 %p_57, i32*** %p_58) nounwind ssp { +for.end: + %conv49 = trunc i32 undef to i8 ; <i8> [#uses=1] + %div.i = udiv i8 %conv49, 5 ; <i8> [#uses=1] + %conv51 = zext i8 %div.i to i32 ; <i32> [#uses=1] + %call55 = call i32 @qux(i32 undef, i32 -2) nounwind ; <i32> [#uses=1] + %rem.i = urem i32 %call55, -1 ; <i32> [#uses=1] + %cmp57 = icmp uge i32 %conv51, %rem.i ; <i1> [#uses=1] + %conv58 = zext i1 %cmp57 to i32 ; <i32> [#uses=1] + %call85 = call i32 @func_35(i32*** undef, i32 undef, i32 %conv58, i32 1247, i32 0) nounwind ; <i32> [#uses=0] + ret void +} + +declare i32 @func_35(i32***, i32, i32, i32, i32) + +declare i32 @qux(i32, i32) diff --git a/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll b/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll new file mode 100644 index 0000000000..5de19662ff --- /dev/null +++ b/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -verify-machineinstrs +; +; When BRCOND is constant-folded to BR, make sure that PHI nodes don't get +; spurious operands when the CFG is trimmed. +; +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.2" + +define fastcc void @_ZSt16__introsort_loopIPdl17less_than_functorEvT_S2_T0_T1_(double* %__first, double* %__last, i64 %__depth_limit) nounwind ssp { +entry: + br i1 undef, label %bb1, label %bb2 + +bb1: ; preds = %entry + ret void + +bb2: ; preds = %entry + br label %bb2.outer.i + +bb2.outer.i: ; preds = %bb9.i, %bb2 + br i1 undef, label %bb1.i, label %bb5.preheader.i + +bb1.i: ; preds = %bb1.i, %bb2.outer.i + %indvar5.i = phi i64 [ %tmp, %bb1.i ], [ 0, %bb2.outer.i ] ; <i64> [#uses=1] + %tmp = add i64 %indvar5.i, 1 ; <i64> [#uses=2] + %scevgep.i = getelementptr double* undef, i64 %tmp ; <double*> [#uses=0] + br i1 undef, label %bb1.i, label %bb5.preheader.i + +bb5.preheader.i: ; preds = %bb1.i, %bb2.outer.i + br label %bb5.i + +bb5.i: ; preds = %bb5.i, %bb5.preheader.i + br i1 undef, label %bb5.i, label %bb7.i6 + +bb7.i6: ; preds = %bb5.i + br i1 undef, label %bb9.i, label %_ZSt21__unguarded_partitionIPdd17less_than_functorET_S2_S2_T0_T1_.exit + +bb9.i: ; preds = %bb7.i6 + br label %bb2.outer.i + +_ZSt21__unguarded_partitionIPdd17less_than_functorET_S2_S2_T0_T1_.exit: ; preds = %bb7.i6 + unreachable +} diff --git a/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll b/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll new file mode 100644 index 0000000000..3cca10e268 --- /dev/null +++ b/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll @@ -0,0 +1,49 @@ +; RUN: llc < %s -verify-machineinstrs +; +; This test case is transformed into a single basic block by the machine +; branch folding pass. That makes a complete mess of the %EFLAGS liveness, but +; we don't care about liveness this late anyway. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.2" + +define i32 @main(i32 %argc, i8** nocapture %argv) ssp { +entry: + br i1 undef, label %bb, label %bb2 + +bb: ; preds = %entry + br label %bb2 + +bb2: ; preds = %bb, %entry + br i1 undef, label %bb3, label %bb5 + +bb3: ; preds = %bb2 + br label %bb5 + +bb5: ; preds = %bb3, %bb2 + br i1 undef, label %bb.nph239, label %bb8 + +bb.nph239: ; preds = %bb5 + unreachable + +bb8: ; preds = %bb5 + br i1 undef, label %bb.nph237, label %bb47 + +bb.nph237: ; preds = %bb8 + unreachable + +bb47: ; preds = %bb8 + br i1 undef, label %bb49, label %bb48 + +bb48: ; preds = %bb47 + unreachable + +bb49: ; preds = %bb47 + br i1 undef, label %bb51, label %bb50 + +bb50: ; preds = %bb49 + ret i32 0 + +bb51: ; preds = %bb49 + ret i32 0 +} diff --git a/test/CodeGen/X86/addr-label-difference.ll b/test/CodeGen/X86/addr-label-difference.ll index 547d6b5765..be0908aa1a 100644 --- a/test/CodeGen/X86/addr-label-difference.ll +++ b/test/CodeGen/X86/addr-label-difference.ll @@ -9,14 +9,18 @@ target triple = "i386-apple-darwin10.0" define void @test(i32 %i) nounwind ssp { entry: + call void @test(i32 1) br label %foo -foo: ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto +foo: + call void @test(i32 1) br label %bar -bar: ; preds = %foo, %indirectgoto +bar: + call void @test(i32 1) br label %hack -hack: ; preds = %bar, %indirectgoto +hack: + call void @test(i32 1) ret void } diff --git a/test/CodeGen/X86/and-or-fold.ll b/test/CodeGen/X86/and-or-fold.ll index 7733b8a5ba..836b5f1551 100644 --- a/test/CodeGen/X86/and-or-fold.ll +++ b/test/CodeGen/X86/and-or-fold.ll @@ -1,14 +1,26 @@ -; RUN: llc < %s -march=x86 | grep and | count 1 +; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck -check-prefix=DARWIN %s +; RUN: opt < %s -O2 | llc -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=DARWIN-OPT %s ; The dag combiner should fold together (x&127)|(y&16711680) -> (x|y)&c1 ; in this case. -define i32 @test6(i32 %x, i16 %y) { - %tmp1 = zext i16 %y to i32 ; <i32> [#uses=1] - %tmp2 = and i32 %tmp1, 127 ; <i32> [#uses=1] - %tmp4 = shl i32 %x, 16 ; <i32> [#uses=1] - %tmp5 = and i32 %tmp4, 16711680 ; <i32> [#uses=1] - %tmp6 = or i32 %tmp2, %tmp5 ; <i32> [#uses=1] - ret i32 %tmp6 +define i32 @test1(i32 %x, i16 %y) { + %tmp1 = zext i16 %y to i32 + %tmp2 = and i32 %tmp1, 127 + %tmp4 = shl i32 %x, 16 + %tmp5 = and i32 %tmp4, 16711680 + %tmp6 = or i32 %tmp2, %tmp5 + ret i32 %tmp6 +; DARWIN: andl $16711807, %eax } +; <rdar://problem/7529774> The optimizer shouldn't fold this into (and (or, C), D) +; if (C & D) == 0 +define i64 @test2(i64 %x) nounwind readnone ssp { +entry: + %tmp1 = and i64 %x, 123127 + %tmp2 = or i64 %tmp1, 3 + ret i64 %tmp2 +; DARWIN-OPT: andq $123124 +; DARWIN-OPT-NEXT: leaq 3 +} diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll index 5bf58fa1d5..2b7019371a 100644 --- a/test/CodeGen/X86/bswap-inline-asm.ll +++ b/test/CodeGen/X86/bswap-inline-asm.ll @@ -1,17 +1,80 @@ ; RUN: llc < %s -march=x86-64 > %t ; RUN: not grep APP %t -; RUN: grep bswapq %t | count 2 -; RUN: grep bswapl %t | count 1 +; RUN: FileCheck %s < %t +; CHECK: foo: +; CHECK: bswapq define i64 @foo(i64 %x) nounwind { %asmtmp = tail call i64 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind ret i64 %asmtmp } + +; CHECK: bar: +; CHECK: bswapq define i64 @bar(i64 %x) nounwind { %asmtmp = tail call i64 asm "bswapq ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind ret i64 %asmtmp } + +; CHECK: pen: +; CHECK: bswapl define i32 @pen(i32 %x) nounwind { %asmtmp = tail call i32 asm "bswapl ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind ret i32 %asmtmp } + +; CHECK: s16: +; CHECK: rolw $8, +define zeroext i16 @s16(i16 zeroext %x) nounwind { + %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind + ret i16 %asmtmp +} + +; CHECK: t16: +; CHECK: rolw $8, +define zeroext i16 @t16(i16 zeroext %x) nounwind { + %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind + ret i16 %asmtmp +} + +; CHECK: u16: +; CHECK: rolw $8, +define zeroext i16 @u16(i16 zeroext %x) nounwind { + %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind + ret i16 %asmtmp +} + +; CHECK: v16: +; CHECK: rolw $8, +define zeroext i16 @v16(i16 zeroext %x) nounwind { + %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind + ret i16 %asmtmp +} + +; CHECK: s32: +; CHECK: bswapl +define i32 @s32(i32 %x) nounwind { + %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind + ret i32 %asmtmp +} + +; CHECK: t32: +; CHECK: bswapl +define i32 @t32(i32 %x) nounwind { + %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind + ret i32 %asmtmp +} + +; CHECK: s64: +; CHECK: bswapq +define i64 @s64(i64 %x) nounwind { + %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind + ret i64 %asmtmp +} + +; CHECK: t64: +; CHECK: bswapq +define i64 @t64(i64 %x) nounwind { + %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{fpsr},~{dirflag},~{flags}"(i64 %x) nounwind + ret i64 %asmtmp +} diff --git a/test/CodeGen/X86/code_placement_eh.ll b/test/CodeGen/X86/code_placement_eh.ll new file mode 100644 index 0000000000..172d5910d0 --- /dev/null +++ b/test/CodeGen/X86/code_placement_eh.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s + +; CodePlacementOpt shouldn't try to modify this loop because +; it involves EH edges. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin10.0" + +define void @foo() { +invcont5: + br label %bb15 + +.noexc3: ; preds = %bb15 + br i1 undef, label %bb18.i5.i, label %bb15 + +.noexc6.i.i: ; preds = %bb18.i5.i + %tmp2021 = invoke float @cosf(float 0.000000e+00) readonly + to label %bb18.i5.i unwind label %lpad.i.i ; <float> [#uses=0] + +bb18.i5.i: ; preds = %.noexc6.i.i, %bb51.i + %tmp2019 = invoke float @sinf(float 0.000000e+00) readonly + to label %.noexc6.i.i unwind label %lpad.i.i ; <float> [#uses=0] + +lpad.i.i: ; preds = %bb18.i5.i, %.noexc6.i.i + %eh_ptr.i.i = call i8* @llvm.eh.exception() ; <i8*> [#uses=1] + unreachable + +lpad59.i: ; preds = %bb15 + %eh_ptr60.i = call i8* @llvm.eh.exception() ; <i8*> [#uses=1] + unreachable + +bb15: ; preds = %.noexc3, %invcont5 + invoke fastcc void @_ZN28btHashedOverlappingPairCacheC2Ev() + to label %.noexc3 unwind label %lpad59.i +} + +declare i8* @llvm.eh.exception() nounwind readonly + +declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind + +declare float @sinf(float) readonly + +declare float @cosf(float) readonly + +declare fastcc void @_ZN28btHashedOverlappingPairCacheC2Ev() align 2 diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll new file mode 100644 index 0000000000..1e13046f2a --- /dev/null +++ b/test/CodeGen/X86/crash.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=x86 %s -o - +; RUN: llc -march=x86-64 %s -o - + +; PR6497 + +; Chain and flag folding issues. +define i32 @test1() nounwind ssp { +entry: + %tmp5.i = volatile load i32* undef ; <i32> [#uses=1] + %conv.i = zext i32 %tmp5.i to i64 ; <i64> [#uses=1] + %tmp12.i = volatile load i32* undef ; <i32> [#uses=1] + %conv13.i = zext i32 %tmp12.i to i64 ; <i64> [#uses=1] + %shl.i = shl i64 %conv13.i, 32 ; <i64> [#uses=1] + %or.i = or i64 %shl.i, %conv.i ; <i64> [#uses=1] + %add16.i = add i64 %or.i, 256 ; <i64> [#uses=1] + %shr.i = lshr i64 %add16.i, 8 ; <i64> [#uses=1] + %conv19.i = trunc i64 %shr.i to i32 ; <i32> [#uses=1] + volatile store i32 %conv19.i, i32* undef + ret i32 undef +} diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll index 4fe554de75..f29cbf323e 100644 --- a/test/CodeGen/X86/critical-edge-split.ll +++ b/test/CodeGen/X86/critical-edge-split.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -tailcallopt=false -stats -info-output-file - | grep asm-printer | grep 31 +; RUN: llc < %s -mtriple=i386-apple-darwin -stats -info-output-file - | grep asm-printer | grep 29 %CC = type { %Register } %II = type { %"struct.XX::II::$_74" } diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll new file mode 100644 index 0000000000..2c699bfb0d --- /dev/null +++ b/test/CodeGen/X86/dllexport.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s | FileCheck %s +; PR2936 + +target triple = "i386-mingw32" + +define dllexport x86_fastcallcc i32 @foo() nounwind { +entry: + ret i32 0 +} + +; CHECK: .section .drectve +; CHECK: -export:@foo@0
\ No newline at end of file diff --git a/test/CodeGen/X86/fastcall-correct-mangling.ll b/test/CodeGen/X86/fastcall-correct-mangling.ll index 2b48f5f371..33b18bb8cc 100644 --- a/test/CodeGen/X86/fastcall-correct-mangling.ll +++ b/test/CodeGen/X86/fastcall-correct-mangling.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -mtriple=i386-unknown-mingw32 | \ -; RUN: grep {@12} +; RUN: llc < %s -mtriple=i386-unknown-mingw32 | FileCheck %s ; Check that a fastcall function gets correct mangling define x86_fastcallcc void @func(i64 %X, i8 %Y, i8 %G, i16 %Z) { +; CHECK: @func@20: ret void } diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll index 3bd58b65be..ff9b1b0b6a 100644 --- a/test/CodeGen/X86/full-lsr.ll +++ b/test/CodeGen/X86/full-lsr.ll @@ -1,12 +1,7 @@ ; RUN: llc < %s -march=x86 >%t -; TODO: Enhance full lsr mode to get this: -; RUNX: grep {addl \\\$4,} %t | count 3 -; RUNX: not grep {,%} %t - -; For now, it should find this, which is still pretty good: -; RUN: not grep {addl \\\$4,} %t -; RUN: grep {,%} %t | count 6 +; RUN: grep {addl \\\$4,} %t | count 3 +; RUN: not grep {,%} %t define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind { entry: diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll index 1a7b5777ae..d79c56bc46 100644 --- a/test/CodeGen/X86/global-sections.ll +++ b/test/CodeGen/X86/global-sections.ll @@ -100,7 +100,7 @@ @G8 = constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ] -; DARWIN: .section __TEXT,__ustring +; DARWIN: .section __TEXT,__const ; DARWIN: .globl _G8 ; DARWIN: _G8: @@ -110,7 +110,6 @@ @G9 = constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ] -; DARWIN: .section __TEXT,__const ; DARWIN: .globl _G9 ; DARWIN: _G9: diff --git a/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/test/CodeGen/X86/ins_subreg_coalesce-3.ll index 627edc51c1..8c1c409766 100644 --- a/test/CodeGen/X86/ins_subreg_coalesce-3.ll +++ b/test/CodeGen/X86/ins_subreg_coalesce-3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep mov | count 5 +; RUN: llc < %s -march=x86-64 | grep mov | count 3 %struct.COMPOSITE = type { i8, i16, i16 } %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll index c695c29e06..408fb20b8d 100644 --- a/test/CodeGen/X86/iv-users-in-other-loops.ll +++ b/test/CodeGen/X86/iv-users-in-other-loops.ll @@ -1,11 +1,11 @@ ; RUN: llc < %s -march=x86-64 -o %t -; RUN: grep inc %t | count 1 +; RUN: not grep inc %t ; RUN: grep dec %t | count 2 ; RUN: grep addq %t | count 13 ; RUN: not grep addb %t -; RUN: grep leaq %t | count 9 -; RUN: grep leal %t | count 3 -; RUN: grep movq %t | count 5 +; RUN: not grep leaq %t +; RUN: not grep leal %t +; RUN: not grep movq %t ; IV users in each of the loops from other loops shouldn't cause LSR ; to insert new induction variables. Previously it would create a diff --git a/test/CodeGen/X86/licm-symbol.ll b/test/CodeGen/X86/licm-symbol.ll new file mode 100644 index 0000000000..d61bbfccbc --- /dev/null +++ b/test/CodeGen/X86/licm-symbol.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s | FileCheck %s + +; MachineLICM should be able to hoist the sF reference out of the loop. + +; CHECK: pushl %esi +; CHECK: subl $8, %esp +; CHECK: movl $176, %esi +; CHECK: addl L___sF$non_lazy_ptr, %esi +; CHECK: .align 4, 0x90 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin8" + +%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } +%struct.__sFILEX = type opaque +%struct.__sbuf = type { i8*, i32 } +%struct.gcov_ctr_summary = type { i32, i32, i64, i64, i64 } +%struct.gcov_summary = type { i32, [1 x %struct.gcov_ctr_summary] } + +@__sF = external global [0 x %struct.FILE] ; <[0 x %struct.FILE]*> [#uses=1] + +declare i32 @fprintf(%struct.FILE* nocapture) nounwind + +define void @gcov_exit() nounwind { +entry: + br label %bb151 + +bb151: ; preds = %bb59, %bb56, %bb14 + br i1 undef, label %bb56, label %bb59 + +bb56: ; preds = %bb151 + %t0 = call i32 (%struct.FILE*)* @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind + br label %bb151 + +bb59: ; preds = %bb151 + %t1 = call i32 (%struct.FILE*)* @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind + br label %bb151 +} + diff --git a/test/CodeGen/X86/loop-strength-reduce-2.ll b/test/CodeGen/X86/loop-strength-reduce-2.ll index 30b5114349..b546462b68 100644 --- a/test/CodeGen/X86/loop-strength-reduce-2.ll +++ b/test/CodeGen/X86/loop-strength-reduce-2.ll @@ -1,11 +1,24 @@ -; RUN: llc < %s -march=x86 -relocation-model=pic | \ -; RUN: grep {, 4} | count 1 -; RUN: llc < %s -march=x86 | not grep lea +; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC +; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s -check-prefix=STATIC ; ; Make sure the common loop invariant A is hoisted up to preheader, ; since too many registers are needed to subsume it into the addressing modes. ; It's safe to sink A in when it's not pic. +; PIC: align +; PIC: movl $4, -4([[REG:%e[a-z]+]]) +; PIC: movl $5, ([[REG]]) +; PIC: addl $4, [[REG]] +; PIC: decl {{%e[[a-z]+}} +; PIC: jne + +; STATIC: align +; STATIC: movl $4, -4(%ecx) +; STATIC: movl $5, (%ecx) +; STATIC: addl $4, %ecx +; STATIC: decl %eax +; STATIC: jne + @A = global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2] define void @test(i32 %row, i32 %N.in) nounwind { diff --git a/test/CodeGen/X86/loop-strength-reduce-3.ll b/test/CodeGen/X86/loop-strength-reduce-3.ll index 70c91340c9..b1c9fb9c07 100644 --- a/test/CodeGen/X86/loop-strength-reduce-3.ll +++ b/test/CodeGen/X86/loop-strength-reduce-3.ll @@ -1,8 +1,11 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | \ -; RUN: grep {A+} | count 2 -; -; Make sure the common loop invariant A is not hoisted up to preheader, -; since it can be subsumed it into the addressing modes. +; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s + +; CHECK: align +; CHECK: movl $4, -4(%ecx) +; CHECK: movl $5, (%ecx) +; CHECK: addl $4, %ecx +; CHECK: decl %eax +; CHECK: jne @A = global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2] diff --git a/test/CodeGen/X86/loop-strength-reduce.ll b/test/CodeGen/X86/loop-strength-reduce.ll index 4cb56ca9ed..42c6ac4983 100644 --- a/test/CodeGen/X86/loop-strength-reduce.ll +++ b/test/CodeGen/X86/loop-strength-reduce.ll @@ -1,8 +1,11 @@ -; RUN: llc < %s -march=x86 -relocation-model=static | \ -; RUN: grep {A+} | count 2 -; -; Make sure the common loop invariant A is not hoisted up to preheader, -; since it can be subsumed into the addressing mode in all uses. +; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s + +; CHECK: align +; CHECK: movl $4, -4(%ecx) +; CHECK: movl $5, (%ecx) +; CHECK: addl $4, %ecx +; CHECK: decl %eax +; CHECK: jne @A = internal global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2] diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll index 07e46eca75..6c0eb8c0df 100644 --- a/test/CodeGen/X86/loop-strength-reduce4.ll +++ b/test/CodeGen/X86/loop-strength-reduce4.ll @@ -1,5 +1,19 @@ -; RUN: llc < %s -march=x86 | grep cmp | grep 64 -; RUN: llc < %s -march=x86 | not grep inc +; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC +; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC + +; By starting the IV at -64 instead of 0, a cmp is eliminated, +; as the flags from the add can be used directly. + +; STATIC: movl $-64, %ecx + +; STATIC: movl %eax, _state+76(%ecx) +; STATIC: addl $16, %ecx +; STATIC: jne + +; In PIC mode the symbol can't be folded, so the change-compare-stride +; trick applies. + +; PIC: cmpl $64 @state = external global [0 x i32] ; <[0 x i32]*> [#uses=4] @S = external global [0 x i32] ; <[0 x i32]*> [#uses=4] diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll index e14cd8a99e..6b2247d1d6 100644 --- a/test/CodeGen/X86/loop-strength-reduce8.ll +++ b/test/CodeGen/X86/loop-strength-reduce8.ll @@ -1,4 +1,10 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16 +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s + +; CHECK: leal 16(%eax), %edx +; CHECK: align +; CHECK: addl $4, %edx +; CHECK: decl %ecx +; CHECK: jne LBB1_2 %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 } %struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] } diff --git a/test/CodeGen/X86/lsr-overflow.ll b/test/CodeGen/X86/lsr-overflow.ll new file mode 100644 index 0000000000..0b0214c6d9 --- /dev/null +++ b/test/CodeGen/X86/lsr-overflow.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +; The comparison uses the pre-inc value, which could lead LSR to +; try to compute -INT64_MIN. + +; CHECK: movabsq $-9223372036854775808, %rax +; CHECK: cmpq %rax, %rbx +; CHECK: sete %al + +declare i64 @bar() + +define i1 @foo() nounwind { +entry: + br label %for.cond.i + +for.cond.i: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.cond.i ] + %t = call i64 @bar() + %indvar.next = add i64 %indvar, 1 + %s = icmp ne i64 %indvar.next, %t + br i1 %s, label %for.cond.i, label %__ABContainsLabel.exit + +__ABContainsLabel.exit: + %cmp = icmp eq i64 %indvar, 9223372036854775807 + ret i1 %cmp +} diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll new file mode 100644 index 0000000000..d1d714491f --- /dev/null +++ b/test/CodeGen/X86/lsr-reuse-trunc.ll @@ -0,0 +1,59 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +; Full strength reduction wouldn't reduce register pressure, so LSR should +; stick with indexing here. + +; CHECK: movaps (%rsi,%rax,4), %xmm3 +; CHECK: movaps %xmm3, (%rdi,%rax,4) +; CHECK: addq $4, %rax +; CHECK: cmpl %eax, (%rdx) +; CHECK-NEXT: jg + +define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind { +entry: + %0 = load i32* %n, align 4 + %1 = icmp sgt i32 %0, 0 + br i1 %1, label %bb, label %return + +bb: + %indvar = phi i64 [ %indvar.next, %bb ], [ 0, %entry ] + %tmp = shl i64 %indvar, 2 + %scevgep = getelementptr float* %y, i64 %tmp + %scevgep9 = bitcast float* %scevgep to <4 x float>* + %scevgep10 = getelementptr float* %x, i64 %tmp + %scevgep1011 = bitcast float* %scevgep10 to <4 x float>* + %2 = load <4 x float>* %scevgep1011, align 16 + %3 = bitcast <4 x float> %2 to <4 x i32> + %4 = and <4 x i32> %3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> + %5 = bitcast <4 x i32> %4 to <4 x float> + %6 = and <4 x i32> %3, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> + %7 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %5, <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06>, i8 5) nounwind + %tmp.i4 = bitcast <4 x float> %7 to <4 x i32> + %8 = xor <4 x i32> %tmp.i4, <i32 -1, i32 -1, i32 -1, i32 -1> + %9 = and <4 x i32> %8, <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200> + %10 = or <4 x i32> %9, %6 + %11 = bitcast <4 x i32> %10 to <4 x float> + %12 = fadd <4 x float> %2, %11 + %13 = fsub <4 x float> %12, %11 + %14 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %2, <4 x float> %13, i8 1) nounwind + %15 = bitcast <4 x float> %14 to <4 x i32> + %16 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %15) nounwind readnone + %17 = fadd <4 x float> %13, %16 + %tmp.i = bitcast <4 x float> %17 to <4 x i32> + %18 = or <4 x i32> %tmp.i, %6 + %19 = bitcast <4 x i32> %18 to <4 x float> + store <4 x float> %19, <4 x float>* %scevgep9, align 16 + %tmp12 = add i64 %tmp, 4 + %tmp13 = trunc i64 %tmp12 to i32 + %20 = load i32* %n, align 4 + %21 = icmp sgt i32 %20, %tmp13 + %indvar.next = add i64 %indvar, 1 + br i1 %21, label %bb, label %return + +return: + ret void +} + +declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone + +declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll new file mode 100644 index 0000000000..2f6fb3fa8b --- /dev/null +++ b/test/CodeGen/X86/lsr-reuse.ll @@ -0,0 +1,442 @@ +; RUN: llc < %s -march=x86-64 -O3 -asm-verbose=false | FileCheck %s +target datalayout = "e-p:64:64:64" +target triple = "x86_64-unknown-unknown" + +; Full strength reduction reduces register pressure from 5 to 4 here. +; Instruction selection should use the FLAGS value from the dec for +; the branch. Scheduling should push the adds upwards. + +; CHECK: full_me_0: +; CHECK: movsd (%rsi), %xmm0 +; CHECK: addq $8, %rsi +; CHECK: mulsd (%rdx), %xmm0 +; CHECK: addq $8, %rdx +; CHECK: movsd %xmm0, (%rdi) +; CHECK: addq $8, %rdi +; CHECK: decq %rcx +; CHECK: jne + +define void @full_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { +entry: + %t0 = icmp sgt i64 %n, 0 + br i1 %t0, label %loop, label %return + +loop: + %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] + %Ai = getelementptr inbounds double* %A, i64 %i + %Bi = getelementptr inbounds double* %B, i64 %i + %Ci = getelementptr inbounds double* %C, i64 %i + %t1 = load double* %Bi + %t2 = load double* %Ci + %m = fmul double %t1, %t2 + store double %m, double* %Ai + %i.next = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +; Mostly-full strength reduction means we do full strength reduction on all +; except for the offsets. +; +; Given a choice between constant offsets -2048 and 2048, choose the negative +; value, because at boundary conditions it has a smaller encoding. +; TODO: That's an over-general heuristic. It would be better for the target +; to indicate what the encoding cost would be. Then using a 2048 offset +; would be better on x86-64, since the start value would be 0 instead of +; 2048. + +; CHECK: mostly_full_me_0: +; CHECK: movsd -2048(%rsi), %xmm0 +; CHECK: mulsd -2048(%rdx), %xmm0 +; CHECK: movsd %xmm0, -2048(%rdi) +; CHECK: movsd (%rsi), %xmm0 +; CHECK: addq $8, %rsi +; CHECK: divsd (%rdx), %xmm0 +; CHECK: addq $8, %rdx +; CHECK: movsd %xmm0, (%rdi) +; CHECK: addq $8, %rdi +; CHECK: decq %rcx +; CHECK: jne + +define void @mostly_full_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { +entry: + %t0 = icmp sgt i64 %n, 0 + br i1 %t0, label %loop, label %return + +loop: + %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] + %Ai = getelementptr inbounds double* %A, i64 %i + %Bi = getelementptr inbounds double* %B, i64 %i + %Ci = getelementptr inbounds double* %C, i64 %i + %t1 = load double* %Bi + %t2 = load double* %Ci + %m = fmul double %t1, %t2 + store double %m, double* %Ai + %j = add i64 %i, 256 + %Aj = getelementptr inbounds double* %A, i64 %j + %Bj = getelementptr inbounds double* %B, i64 %j + %Cj = getelementptr inbounds double* %C, i64 %j + %t3 = load double* %Bj + %t4 = load double* %Cj + %o = fdiv double %t3, %t4 + store double %o, double* %Aj + %i.next = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +; A minor variation on mostly_full_me_0. +; Prefer to start the indvar at 0. + +; CHECK: mostly_full_me_1: +; CHECK: movsd (%rsi), %xmm0 +; CHECK: mulsd (%rdx), %xmm0 +; CHECK: movsd %xmm0, (%rdi) +; CHECK: movsd -2048(%rsi), %xmm0 +; CHECK: addq $8, %rsi +; CHECK: divsd -2048(%rdx), %xmm0 +; CHECK: addq $8, %rdx +; CHECK: movsd %xmm0, -2048(%rdi) +; CHECK: addq $8, %rdi +; CHECK: decq %rcx +; CHECK: jne + +define void @mostly_full_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { +entry: + %t0 = icmp sgt i64 %n, 0 + br i1 %t0, label %loop, label %return + +loop: + %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] + %Ai = getelementptr inbounds double* %A, i64 %i + %Bi = getelementptr inbounds double* %B, i64 %i + %Ci = getelementptr inbounds double* %C, i64 %i + %t1 = load double* %Bi + %t2 = load double* %Ci + %m = fmul double %t1, %t2 + store double %m, double* %Ai + %j = sub i64 %i, 256 + %Aj = getelementptr inbounds double* %A, i64 %j + %Bj = getelementptr inbounds double* %B, i64 %j + %Cj = getelementptr inbounds double* %C, i64 %j + %t3 = load double* %Bj + %t4 = load double* %Cj + %o = fdiv double %t3, %t4 + store double %o, double* %Aj + %i.next = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +; A slightly less minor variation on mostly_full_me_0. + +; CHECK: mostly_full_me_2: +; CHECK: movsd (%rsi), %xmm0 +; CHECK: mulsd (%rdx), %xmm0 +; CHECK: movsd %xmm0, (%rdi) +; CHECK: movsd -4096(%rsi), %xmm0 +; CHECK: addq $8, %rsi +; CHECK: divsd -4096(%rdx), %xmm0 +; CHECK: addq $8, %rdx +; CHECK: movsd %xmm0, -4096(%rdi) +; CHECK: addq $8, %rdi +; CHECK: decq %rcx +; CHECK: jne + +define void @mostly_full_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { +entry: + %t0 = icmp sgt i64 %n, 0 + br i1 %t0, label %loop, label %return + +loop: + %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] + %k = add i64 %i, 256 + %Ak = getelementptr inbounds double* %A, i64 %k + %Bk = getelementptr inbounds double* %B, i64 %k + %Ck = getelementptr inbounds double* %C, i64 %k + %t1 = load double* %Bk + %t2 = load double* %Ck + %m = fmul double %t1, %t2 + store double %m, double* %Ak + %j = sub i64 %i, 256 + %Aj = getelementptr inbounds double* %A, i64 %j + %Bj = getelementptr inbounds double* %B, i64 %j + %Cj = getelementptr inbounds double* %C, i64 %j + %t3 = load double* %Bj + %t4 = load double* %Cj + %o = fdiv double %t3, %t4 + store double %o, double* %Aj + %i.next = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +; In this test, the counting IV exit value is used, so full strength reduction +; would not reduce register pressure. IndVarSimplify ought to simplify such +; cases away, but it's useful here to verify that LSR's register pressure +; heuristics are working as expected. + +; CHECK: count_me_0: +; CHECK: movsd (%rsi,%rax,8), %xmm0 +; CHECK: mulsd (%rdx,%rax,8), %xmm0 +; CHECK: movsd %xmm0, (%rdi,%rax,8) +; CHECK: incq %rax +; CHECK: cmpq %rax, %rcx +; CHECK: jne + +define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { +entry: + %t0 = icmp sgt i64 %n, 0 + br i1 %t0, label %loop, label %return + +loop: + %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] + %Ai = getelementptr inbounds double* %A, i64 %i + %Bi = getelementptr inbounds double* %B, i64 %i + %Ci = getelementptr inbounds double* %C, i64 %i + %t1 = load double* %Bi + %t2 = load double* %Ci + %m = fmul double %t1, %t2 + store double %m, double* %Ai + %i.next = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %return, label %loop + +return: + %q = phi i64 [ 0, %entry ], [ %i.next, %loop ] + ret i64 %q +} + +; In this test, the trip count value is used, so full strength reduction +; would not reduce register pressure. +; (though it would reduce register pressure inside the loop...) + +; CHECK: count_me_1: +; CHECK: movsd (%rsi,%rax,8), %xmm0 +; CHECK: mulsd (%rdx,%rax,8), %xmm0 +; CHECK: movsd %xmm0, (%rdi,%rax,8) +; CHECK: incq %rax +; CHECK: cmpq %rax, %rcx +; CHECK: jne + +define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { +entry: + %t0 = icmp sgt i64 %n, 0 + br i1 %t0, label %loop, label %return + +loop: + %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] + %Ai = getelementptr inbounds double* %A, i64 %i + %Bi = getelementptr inbounds double* %B, i64 %i + %Ci = getelementptr inbounds double* %C, i64 %i + %t1 = load double* %Bi + %t2 = load double* %Ci + %m = fmul double %t1, %t2 + store double %m, double* %Ai + %i.next = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %return, label %loop + +return: + %q = phi i64 [ 0, %entry ], [ %n, %loop ] + ret i64 %q +} + +; Full strength reduction doesn't save any registers here because the +; loop tripcount is a constant. + +; CHECK: count_me_2: +; CHECK: movl $10, %eax +; CHECK: align +; CHECK: BB7_1: +; CHECK: movsd -40(%rdi,%rax,8), %xmm0 +; CHECK: addsd -40(%rsi,%rax,8), %xmm0 +; CHECK: movsd %xmm0, -40(%rdx,%rax,8) +; CHECK: movsd (%rdi,%rax,8), %xmm0 +; CHECK: subsd (%rsi,%rax,8), %xmm0 +; CHECK: movsd %xmm0, (%rdx,%rax,8) +; CHECK: incq %rax +; CHECK: cmpq $5010, %rax +; CHECK: jne + +define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind { +entry: + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop ] + %i5 = add i64 %i, 5 + %Ai = getelementptr double* %A, i64 %i5 + %t2 = load double* %Ai + %Bi = getelementptr double* %B, i64 %i5 + %t4 = load double* %Bi + %t5 = fadd double %t2, %t4 + %Ci = getelementptr double* %C, i64 %i5 + store double %t5, double* %Ci + %i10 = add i64 %i, 10 + %Ai10 = getelementptr double* %A, i64 %i10 + %t9 = load double* %Ai10 + %Bi10 = getelementptr double* %B, i64 %i10 + %t11 = load double* %Bi10 + %t12 = fsub double %t9, %t11 + %Ci10 = getelementptr double* %C, i64 %i10 + store double %t12, double* %Ci10 + %i.next = add i64 %i, 1 + %exitcond = icmp eq i64 %i.next, 5000 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +; This should be fully strength-reduced to reduce register pressure. + +; CHECK: full_me_1: +; CHECK: align +; CHECK: BB8_1: +; CHECK: movsd (%rdi), %xmm0 +; CHECK: addsd (%rsi), %xmm0 +; CHECK: movsd %xmm0, (%rdx) +; CHECK: movsd 40(%rdi), %xmm0 +; CHECK: addq $8, %rdi +; CHECK: subsd 40(%rsi), %xmm0 +; CHECK: addq $8, %rsi +; CHECK: movsd %xmm0, 40(%rdx) +; CHECK: addq $8, %rdx +; CHECK: decq %rcx +; CHECK: jne + +define void @full_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { +entry: + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop ] + %i5 = add i64 %i, 5 + %Ai = getelementptr double* %A, i64 %i5 + %t2 = load double* %Ai + %Bi = getelementptr double* %B, i64 %i5 + %t4 = load double* %Bi + %t5 = fadd double %t2, %t4 + %Ci = getelementptr double* %C, i64 %i5 + store double %t5, double* %Ci + %i10 = add i64 %i, 10 + %Ai10 = getelementptr double* %A, i64 %i10 + %t9 = load double* %Ai10 + %Bi10 = getelementptr double* %B, i64 %i10 + %t11 = load double* %Bi10 + %t12 = fsub double %t9, %t11 + %Ci10 = getelementptr double* %C, i64 %i10 + store double %t12, double* %Ci10 + %i.next = add i64 %i, 1 + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +; This is a variation on full_me_0 in which the 0,+,1 induction variable +; has a non-address use, pinning that value in a register. + +; CHECK: count_me_3: +; CHECK: call +; CHECK: movsd (%r15,%r13,8), %xmm0 +; CHECK: mulsd (%r14,%r13,8), %xmm0 +; CHECK: movsd %xmm0, (%r12,%r13,8) +; CHECK: incq %r13 +; CHECK: cmpq %r13, %rbx +; CHECK: jne + +declare void @use(i64) + +define void @count_me_3(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { +entry: + %t0 = icmp sgt i64 %n, 0 + br i1 %t0, label %loop, label %return + +loop: + %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] + call void @use(i64 %i) + %Ai = getelementptr inbounds double* %A, i64 %i + %Bi = getelementptr inbounds double* %B, i64 %i + %Ci = getelementptr inbounds double* %C, i64 %i + %t1 = load double* %Bi + %t2 = load double* %Ci + %m = fmul double %t1, %t2 + store double %m, double* %Ai + %i.next = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +; LSR should use only one indvar for the inner loop. +; rdar://7657764 + +; CHECK: asd: +; CHECK: BB10_5: +; CHECK-NEXT: addl (%r{{[^,]*}},%rdi,4), %e +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: cmpq %rdi, %r{{[^,]*}} +; CHECK-NEXT: jg + +%struct.anon = type { i32, [4200 x i32] } + +@bars = common global [123123 x %struct.anon] zeroinitializer, align 32 ; <[123123 x %struct.anon]*> [#uses=2] + +define i32 @asd(i32 %n) nounwind readonly { +entry: + %0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1] + br i1 %0, label %bb.nph14, label %bb5 + +bb.nph14: ; preds = %entry + %tmp18 = zext i32 %n to i64 ; <i64> [#uses=1] + br label %bb + +bb: ; preds = %bb3, %bb.nph14 + %indvar16 = phi i64 [ 0, %bb.nph14 ], [ %indvar.next17, %bb3 ] ; <i64> [#uses=3] + %s.113 = phi i32 [ 0, %bb.nph14 ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=2] + %scevgep2526 = getelementptr [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 0 ; <i32*> [#uses=1] + %1 = load i32* %scevgep2526, align 4 ; <i32> [#uses=2] + %2 = icmp sgt i32 %1, 0 ; <i1> [#uses=1] + br i1 %2, label %bb.nph, label %bb3 + +bb.nph: ; preds = %bb + %tmp23 = sext i32 %1 to i64 ; <i64> [#uses=1] + br label %bb1 + +bb1: ; preds = %bb.nph, %bb1 + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp19, %bb1 ] ; <i64> [#uses=2] + %s.07 = phi i32 [ %s.113, %bb.nph ], [ %4, %bb1 ] ; <i32> [#uses=1] + %c.08 = getelementptr [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 1, i64 %indvar ; <i32*> [#uses=1] + %3 = load i32* %c.08, align 4 ; <i32> [#uses=1] + %4 = add nsw i32 %3, %s.07 ; <i32> [#uses=2] + %tmp19 = add i64 %indvar, 1 ; <i64> [#uses=2] + %5 = icmp sgt i64 %tmp23, %tmp19 ; <i1> [#uses=1] + br i1 %5, label %bb1, label %bb3 + +bb3: ; preds = %bb1, %bb + %s.0.lcssa = phi i32 [ %s.113, %bb ], [ %4, %bb1 ] ; <i32> [#uses=2] + %indvar.next17 = add i64 %indvar16, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %indvar.next17, %tmp18 ; <i1> [#uses=1] + br i1 %exitcond, label %bb5, label %bb + +bb5: ; preds = %bb3, %entry + %s.1.lcssa = phi i32 [ 0, %entry ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=1] + ret i32 %s.1.lcssa +} diff --git a/test/CodeGen/X86/lsr-wrap.ll b/test/CodeGen/X86/lsr-wrap.ll new file mode 100644 index 0000000000..ec8db501ef --- /dev/null +++ b/test/CodeGen/X86/lsr-wrap.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; LSR would like to use a single IV for both of these, however it's +; not safe due to wraparound. + +; CHECK: addb $-4, %r +; CHECK: decw % + +@g_19 = common global i32 0 ; <i32*> [#uses=2] + +declare i32 @func_8(i8 zeroext) nounwind + +declare i32 @func_3(i8 signext) nounwind + +define void @func_1() nounwind { +entry: + br label %bb + +bb: ; preds = %bb, %entry + %indvar = phi i16 [ 0, %entry ], [ %indvar.next, %bb ] ; <i16> [#uses=2] + %tmp = sub i16 0, %indvar ; <i16> [#uses=1] + %tmp27 = trunc i16 %tmp to i8 ; <i8> [#uses=1] + %tmp1 = load i32* @g_19, align 4 ; <i32> [#uses=2] + %tmp2 = add i32 %tmp1, 1 ; <i32> [#uses=1] + store i32 %tmp2, i32* @g_19, align 4 + %tmp3 = trunc i32 %tmp1 to i8 ; <i8> [#uses=1] + %tmp4 = tail call i32 @func_8(i8 zeroext %tmp3) nounwind ; <i32> [#uses=0] + %tmp5 = shl i8 %tmp27, 2 ; <i8> [#uses=1] + %tmp6 = add i8 %tmp5, -112 ; <i8> [#uses=1] + %tmp7 = tail call i32 @func_3(i8 signext %tmp6) nounwind ; <i32> [#uses=0] + %indvar.next = add i16 %indvar, 1 ; <i16> [#uses=2] + %exitcond = icmp eq i16 %indvar.next, -28 ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb + ret void +} diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll index bc493bd8f7..0b4d73a683 100644 --- a/test/CodeGen/X86/masked-iv-safe.ll +++ b/test/CodeGen/X86/masked-iv-safe.ll @@ -169,7 +169,7 @@ loop: %indvar.i24 = and i64 %indvar, 16777215 %t3 = getelementptr double* %d, i64 %indvar.i24 %t4 = load double* %t3 - %t5 = fmul double %t4, 2.3 + %t5 = fdiv double %t4, 2.3 store double %t5, double* %t3 %t6 = getelementptr double* %d, i64 %indvar %t7 = load double* %t6 @@ -199,7 +199,7 @@ loop: %indvar.i24 = ashr i64 %s1, 24 %t3 = getelementptr double* %d, i64 %indvar.i24 %t4 = load double* %t3 - %t5 = fmul double %t4, 2.3 + %t5 = fdiv double %t4, 2.3 store double %t5, double* %t3 %t6 = getelementptr double* %d, i64 %indvar %t7 = load double* %t6 @@ -229,7 +229,7 @@ loop: %indvar.i24 = ashr i64 %s1, 24 %t3 = getelementptr double* %d, i64 %indvar.i24 %t4 = load double* %t3 - %t5 = fmul double %t4, 2.3 + %t5 = fdiv double %t4, 2.3 store double %t5, double* %t3 %t6 = getelementptr double* %d, i64 %indvar %t7 = load double* %t6 diff --git a/test/CodeGen/X86/omit-label.ll b/test/CodeGen/X86/omit-label.ll deleted file mode 100644 index 0ec03ebace..0000000000 --- a/test/CodeGen/X86/omit-label.ll +++ /dev/null @@ -1,57 +0,0 @@ -; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-linux-gnu | FileCheck %s -; PR4126 -; PR4732 - -; Don't omit these labels' definitions. - -; CHECK: bux: -; CHECK: LBB1_1: - -define void @bux(i32 %p_53) nounwind optsize { -entry: - %0 = icmp eq i32 %p_53, 0 ; <i1> [#uses=1] - %1 = icmp sgt i32 %p_53, 0 ; <i1> [#uses=1] - %or.cond = and i1 %0, %1 ; <i1> [#uses=1] - br i1 %or.cond, label %bb.i, label %bb3 - -bb.i: ; preds = %entry - %2 = add i32 %p_53, 1 ; <i32> [#uses=1] - %3 = icmp slt i32 %2, 0 ; <i1> [#uses=0] - br label %bb3 - -bb3: ; preds = %bb.i, %entry - %4 = tail call i32 (...)* @baz(i32 0) nounwind ; <i32> [#uses=0] - ret void -} - -declare i32 @baz(...) - -; Don't omit this label in the assembly output. -; CHECK: int321: -; CHECK: LBB2_1 -; CHECK: LBB2_1 -; CHECK: LBB2_1: - -define void @int321(i8 signext %p_103, i32 %uint8p_104) nounwind readnone { -entry: - %tobool = icmp eq i8 %p_103, 0 ; <i1> [#uses=1] - %cmp.i = icmp sgt i8 %p_103, 0 ; <i1> [#uses=1] - %or.cond = and i1 %tobool, %cmp.i ; <i1> [#uses=1] - br i1 %or.cond, label %land.end.i, label %for.cond.preheader - -land.end.i: ; preds = %entry - %conv3.i = sext i8 %p_103 to i32 ; <i32> [#uses=1] - %div.i = sdiv i32 1, %conv3.i ; <i32> [#uses=1] - %tobool.i = icmp eq i32 %div.i, -2147483647 ; <i1> [#uses=0] - br label %for.cond.preheader - -for.cond.preheader: ; preds = %land.end.i, %entry - %cmp = icmp sgt i8 %p_103, 1 ; <i1> [#uses=1] - br i1 %cmp, label %for.end.split, label %for.cond - -for.cond: ; preds = %for.cond.preheader, %for.cond - br label %for.cond - -for.end.split: ; preds = %for.cond.preheader - ret void -} diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll index 12736cda4c..6a08dae51f 100644 --- a/test/CodeGen/X86/pr1505b.ll +++ b/test/CodeGen/X86/pr1505b.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mcpu=i486 | grep fstpl | count 4 -; RUN: llc < %s -mcpu=i486 | grep fstps | count 3 +; RUN: llc < %s -mcpu=i486 | grep fstpl | count 5 +; RUN: llc < %s -mcpu=i486 | grep fstps | count 2 ; PR1505 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/test/CodeGen/X86/pr3495-2.ll b/test/CodeGen/X86/pr3495-2.ll index 71aa5a0488..98c064a07d 100644 --- a/test/CodeGen/X86/pr3495-2.ll +++ b/test/CodeGen/X86/pr3495-2.ll @@ -1,4 +1,8 @@ -; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited} +; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of loads added} | grep 1 +; PR3495 +; +; This test may not be testing what it was supposed to test. +; It used to have two spills and four reloads, but not it only has one spill and one reload. target datalayout = "e-p:32:32:32" target triple = "i386-apple-darwin9.6" diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll index 1795970d35..e84a84f59b 100644 --- a/test/CodeGen/X86/pr3495.ll +++ b/test/CodeGen/X86/pr3495.ll @@ -1,8 +1,7 @@ ; RUN: llc < %s -march=x86 -stats |& grep {Number of loads added} | grep 2 ; RUN: llc < %s -march=x86 -stats |& grep {Number of register spills} | grep 1 -; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 38 +; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 34 ; PR3495 -; The loop reversal kicks in once here, resulting in one fewer instruction. target triple = "i386-pc-linux-gnu" @x = external global [8 x i32], align 32 ; <[8 x i32]*> [#uses=1] diff --git a/test/CodeGen/X86/pre-split8.ll b/test/CodeGen/X86/pre-split8.ll index ea4b9496b3..0684bd036c 100644 --- a/test/CodeGen/X86/pre-split8.ll +++ b/test/CodeGen/X86/pre-split8.ll @@ -20,7 +20,7 @@ bb: ; preds = %bb9.i, %entry bb9.i: ; preds = %bb %2 = fsub double %.rle4, %0 ; <double> [#uses=0] - %3 = tail call double @asin(double 0.000000e+00) nounwind readonly ; <double> [#uses=0] + %3 = tail call double @asin(double %.rle4) nounwind readonly ; <double> [#uses=0] %4 = fmul double 0.000000e+00, %0 ; <double> [#uses=1] %5 = tail call double @tan(double 0.000000e+00) nounwind readonly ; <double> [#uses=0] %6 = fmul double %4, 0.000000e+00 ; <double> [#uses=1] diff --git a/test/CodeGen/X86/pre-split9.ll b/test/CodeGen/X86/pre-split9.ll index c27d925d43..86dda33533 100644 --- a/test/CodeGen/X86/pre-split9.ll +++ b/test/CodeGen/X86/pre-split9.ll @@ -22,7 +22,7 @@ bb: ; preds = %bb9.i, %entry bb9.i: ; preds = %bb %2 = fsub double %.rle4, %0 ; <double> [#uses=0] - %3 = tail call double @asin(double 0.000000e+00) nounwind readonly ; <double> [#uses=0] + %3 = tail call double @asin(double %.rle4) nounwind readonly ; <double> [#uses=0] %4 = tail call double @sin(double 0.000000e+00) nounwind readonly ; <double> [#uses=1] %5 = fmul double %4, %0 ; <double> [#uses=1] %6 = tail call double @tan(double 0.000000e+00) nounwind readonly ; <double> [#uses=0] diff --git a/test/CodeGen/X86/ptrtoint-constexpr.ll b/test/CodeGen/X86/ptrtoint-constexpr.ll index dd9790568a..d1cb34bec8 100644 --- a/test/CodeGen/X86/ptrtoint-constexpr.ll +++ b/test/CodeGen/X86/ptrtoint-constexpr.ll @@ -9,6 +9,6 @@ ; CHECK: .globl x ; CHECK: x: -; CHECK: .quad 3 +; CHECK: .quad ((0+1)&4294967295)*3 @x = global i64 mul (i64 3, i64 ptrtoint (i2* getelementptr (i2* null, i64 1) to i64)) diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll index fc67e4417c..77f320f105 100644 --- a/test/CodeGen/X86/scalar_widen_div.ll +++ b/test/CodeGen/X86/scalar_widen_div.ll @@ -152,3 +152,32 @@ define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) { %rem.r = urem <5 x i64> %num, %rem ret <5 x i64> %rem.r } + +define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) { +; CHECK: idivl +; CHECK: idivl +; CHECK: idivl +; CHECK-NOT: idivl +; CHECK: ret +entry: + %cmp13 = icmp sgt i32 %n, 0 + br i1 %cmp13, label %bb.nph, label %for.end + +bb.nph: + br label %for.body + +for.body: + %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] + %arrayidx11 = getelementptr <3 x i32>* %dest, i32 %i.014 + %tmp4 = load <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1] + %arrayidx7 = getelementptr inbounds <3 x i32>* %old, i32 %i.014 + %tmp8 = load <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1] + %div = sdiv <3 x i32> %tmp4, %tmp8 + store <3 x i32> %div, <3 x i32>* %arrayidx11 + %inc = add nsw i32 %i.014, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index 17ffb5e464..19fbed015b 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -1,17 +1,26 @@ ; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s +; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math | FileCheck -check-prefix=UNSAFE %s +; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s ; Some of these patterns can be matched as SSE min or max. Some of ; then can be matched provided that the operands are swapped. ; Some of them can't be matched at all and require a comparison ; and a conditional branch. -; The naming convention is {,x_}{o,u}{gt,lt,ge,le}{,_inverse} +; The naming convention is {,x_,y_}{o,u}{gt,lt,ge,le}{,_inverse} ; x_ : use 0.0 instead of %y +; y_ : use -0.0 instead of %y ; _inverse : swap the arms of the select. ; CHECK: ogt: ; CHECK-NEXT: maxsd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: ogt: +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: ogt: +; FINITE-NEXT: maxsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @ogt(double %x, double %y) nounwind { %c = fcmp ogt double %x, %y %d = select i1 %c, double %x, double %y @@ -21,6 +30,12 @@ define double @ogt(double %x, double %y) nounwind { ; CHECK: olt: ; CHECK-NEXT: minsd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: olt: +; UNSAFE-NEXT: minsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: olt: +; FINITE-NEXT: minsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @olt(double %x, double %y) nounwind { %c = fcmp olt double %x, %y %d = select i1 %c, double %x, double %y @@ -31,6 +46,14 @@ define double @olt(double %x, double %y) nounwind { ; CHECK-NEXT: minsd %xmm0, %xmm1 ; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: ogt_inverse: +; UNSAFE-NEXT: minsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: ogt_inverse: +; FINITE-NEXT: minsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @ogt_inverse(double %x, double %y) nounwind { %c = fcmp ogt double %x, %y %d = select i1 %c, double %y, double %x @@ -41,6 +64,14 @@ define double @ogt_inverse(double %x, double %y) nounwind { ; CHECK-NEXT: maxsd %xmm0, %xmm1 ; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: olt_inverse: +; UNSAFE-NEXT: maxsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: olt_inverse: +; FINITE-NEXT: maxsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @olt_inverse(double %x, double %y) nounwind { %c = fcmp olt double %x, %y %d = select i1 %c, double %y, double %x @@ -49,6 +80,12 @@ define double @olt_inverse(double %x, double %y) nounwind { ; CHECK: oge: ; CHECK-NEXT: ucomisd %xmm1, %xmm0 +; UNSAFE: oge: +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: oge: +; FINITE-NEXT: maxsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @oge(double %x, double %y) nounwind { %c = fcmp oge double %x, %y %d = select i1 %c, double %x, double %y @@ -57,6 +94,10 @@ define double @oge(double %x, double %y) nounwind { ; CHECK: ole: ; CHECK-NEXT: ucomisd %xmm0, %xmm1 +; UNSAFE: ole: +; UNSAFE-NEXT: minsd %xmm1, %xmm0 +; FINITE: ole: +; FINITE-NEXT: minsd %xmm1, %xmm0 define double @ole(double %x, double %y) nounwind { %c = fcmp ole double %x, %y %d = select i1 %c, double %x, double %y @@ -65,6 +106,14 @@ define double @ole(double %x, double %y) nounwind { ; CHECK: oge_inverse: ; CHECK-NEXT: ucomisd %xmm1, %xmm0 +; UNSAFE: oge_inverse: +; UNSAFE-NEXT: minsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: oge_inverse: +; FINITE-NEXT: minsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @oge_inverse(double %x, double %y) nounwind { %c = fcmp oge double %x, %y %d = select i1 %c, double %y, double %x @@ -73,6 +122,14 @@ define double @oge_inverse(double %x, double %y) nounwind { ; CHECK: ole_inverse: ; CHECK-NEXT: ucomisd %xmm0, %xmm1 +; UNSAFE: ole_inverse: +; UNSAFE-NEXT: maxsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: ole_inverse: +; FINITE-NEXT: maxsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @ole_inverse(double %x, double %y) nounwind { %c = fcmp ole double %x, %y %d = select i1 %c, double %y, double %x @@ -83,6 +140,14 @@ define double @ole_inverse(double %x, double %y) nounwind { ; CHECK-NEXT: pxor %xmm1, %xmm1 ; CHECK-NEXT: maxsd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: x_ogt: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_ogt: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_ogt(double %x) nounwind { %c = fcmp ogt double %x, 0.000000e+00 %d = select i1 %c, double %x, double 0.000000e+00 @@ -93,6 +158,14 @@ define double @x_ogt(double %x) nounwind { ; CHECK-NEXT: pxor %xmm1, %xmm1 ; CHECK-NEXT: minsd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: x_olt: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_olt: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_olt(double %x) nounwind { %c = fcmp olt double %x, 0.000000e+00 %d = select i1 %c, double %x, double 0.000000e+00 @@ -104,6 +177,16 @@ define double @x_olt(double %x) nounwind { ; CHECK-NEXT: minsd %xmm0, %xmm1 ; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: x_ogt_inverse: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_ogt_inverse: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_ogt_inverse(double %x) nounwind { %c = fcmp ogt double %x, 0.000000e+00 %d = select i1 %c, double 0.000000e+00, double %x @@ -115,6 +198,16 @@ define double @x_ogt_inverse(double %x) nounwind { ; CHECK-NEXT: maxsd %xmm0, %xmm1 ; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: x_olt_inverse: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_olt_inverse: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_olt_inverse(double %x) nounwind { %c = fcmp olt double %x, 0.000000e+00 %d = select i1 %c, double 0.000000e+00, double %x @@ -122,9 +215,15 @@ define double @x_olt_inverse(double %x) nounwind { } ; CHECK: x_oge: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: maxsd %xmm1, %xmm0 -; CHECK-NEXT: ret +; CHECK: ucomisd %xmm1, %xmm0 +; UNSAFE: x_oge: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_oge: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_oge(double %x) nounwind { %c = fcmp oge double %x, 0.000000e+00 %d = select i1 %c, double %x, double 0.000000e+00 @@ -132,9 +231,15 @@ define double @x_oge(double %x) nounwind { } ; CHECK: x_ole: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: minsd %xmm1, %xmm0 -; CHECK-NEXT: ret +; CHECK: ucomisd %xmm0, %xmm1 +; UNSAFE: x_ole: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_ole: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_ole(double %x) nounwind { %c = fcmp ole double %x, 0.000000e+00 %d = select i1 %c, double %x, double 0.000000e+00 @@ -142,10 +247,17 @@ define double @x_ole(double %x) nounwind { } ; CHECK: x_oge_inverse: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: minsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: ret +; CHECK: ucomisd %xmm1, %xmm0 +; UNSAFE: x_oge_inverse: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_oge_inverse: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_oge_inverse(double %x) nounwind { %c = fcmp oge double %x, 0.000000e+00 %d = select i1 %c, double 0.000000e+00, double %x @@ -153,10 +265,17 @@ define double @x_oge_inverse(double %x) nounwind { } ; CHECK: x_ole_inverse: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: maxsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: ret +; CHECK: ucomisd %xmm0, %xmm1 +; UNSAFE: x_ole_inverse: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_ole_inverse: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_ole_inverse(double %x) nounwind { %c = fcmp ole double %x, 0.000000e+00 %d = select i1 %c, double 0.000000e+00, double %x @@ -164,7 +283,13 @@ define double @x_ole_inverse(double %x) nounwind { } ; CHECK: ugt: -; CHECK-NEXT: ucomisd %xmm0, %xmm1 +; CHECK: ucomisd %xmm0, %xmm1 +; UNSAFE: ugt: +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: ugt: +; FINITE-NEXT: maxsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @ugt(double %x, double %y) nounwind { %c = fcmp ugt double %x, %y %d = select i1 %c, double %x, double %y @@ -172,7 +297,13 @@ define double @ugt(double %x, double %y) nounwind { } ; CHECK: ult: -; CHECK-NEXT: ucomisd %xmm1, %xmm0 +; CHECK: ucomisd %xmm1, %xmm0 +; UNSAFE: ult: +; UNSAFE-NEXT: minsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: ult: +; FINITE-NEXT: minsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @ult(double %x, double %y) nounwind { %c = fcmp ult double %x, %y %d = select i1 %c, double %x, double %y @@ -180,7 +311,15 @@ define double @ult(double %x, double %y) nounwind { } ; CHECK: ugt_inverse: -; CHECK-NEXT: ucomisd %xmm0, %xmm1 +; CHECK: ucomisd %xmm0, %xmm1 +; UNSAFE: ugt_inverse: +; UNSAFE-NEXT: minsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: ugt_inverse: +; FINITE-NEXT: minsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @ugt_inverse(double %x, double %y) nounwind { %c = fcmp ugt double %x, %y %d = select i1 %c, double %y, double %x @@ -188,7 +327,15 @@ define double @ugt_inverse(double %x, double %y) nounwind { } ; CHECK: ult_inverse: -; CHECK-NEXT: ucomisd %xmm1, %xmm0 +; CHECK: ucomisd %xmm1, %xmm0 +; UNSAFE: ult_inverse: +; UNSAFE-NEXT: maxsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: ult_inverse: +; FINITE-NEXT: maxsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @ult_inverse(double %x, double %y) nounwind { %c = fcmp ult double %x, %y %d = select i1 %c, double %y, double %x @@ -196,9 +343,15 @@ define double @ult_inverse(double %x, double %y) nounwind { } ; CHECK: uge: -; CHECK-NEXT: maxsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: maxsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: uge: +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: uge: +; FINITE-NEXT: maxsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @uge(double %x, double %y) nounwind { %c = fcmp uge double %x, %y %d = select i1 %c, double %x, double %y @@ -209,6 +362,12 @@ define double @uge(double %x, double %y) nounwind { ; CHECK-NEXT: minsd %xmm0, %xmm1 ; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: ule: +; UNSAFE-NEXT: minsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: ule: +; FINITE-NEXT: minsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @ule(double %x, double %y) nounwind { %c = fcmp ule double %x, %y %d = select i1 %c, double %x, double %y @@ -218,6 +377,14 @@ define double @ule(double %x, double %y) nounwind { ; CHECK: uge_inverse: ; CHECK-NEXT: minsd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: uge_inverse: +; UNSAFE-NEXT: minsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: uge_inverse: +; FINITE-NEXT: minsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @uge_inverse(double %x, double %y) nounwind { %c = fcmp uge double %x, %y %d = select i1 %c, double %y, double %x @@ -227,6 +394,14 @@ define double @uge_inverse(double %x, double %y) nounwind { ; CHECK: ule_inverse: ; CHECK-NEXT: maxsd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: ule_inverse: +; UNSAFE-NEXT: maxsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: ule_inverse: +; FINITE-NEXT: maxsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @ule_inverse(double %x, double %y) nounwind { %c = fcmp ule double %x, %y %d = select i1 %c, double %y, double %x @@ -234,10 +409,15 @@ define double @ule_inverse(double %x, double %y) nounwind { } ; CHECK: x_ugt: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: maxsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: ret +; CHECK: ucomisd %xmm0, %xmm1 +; UNSAFE: x_ugt: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_ugt: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_ugt(double %x) nounwind { %c = fcmp ugt double %x, 0.000000e+00 %d = select i1 %c, double %x, double 0.000000e+00 @@ -245,10 +425,15 @@ define double @x_ugt(double %x) nounwind { } ; CHECK: x_ult: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: minsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: ret +; CHECK: ucomisd %xmm1, %xmm0 +; UNSAFE: x_ult: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_ult: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_ult(double %x) nounwind { %c = fcmp ult double %x, 0.000000e+00 %d = select i1 %c, double %x, double 0.000000e+00 @@ -256,9 +441,17 @@ define double @x_ult(double %x) nounwind { } ; CHECK: x_ugt_inverse: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: minsd %xmm1, %xmm0 -; CHECK-NEXT: ret +; CHECK: ucomisd %xmm0, %xmm1 +; UNSAFE: x_ugt_inverse: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_ugt_inverse: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_ugt_inverse(double %x) nounwind { %c = fcmp ugt double %x, 0.000000e+00 %d = select i1 %c, double 0.000000e+00, double %x @@ -266,9 +459,17 @@ define double @x_ugt_inverse(double %x) nounwind { } ; CHECK: x_ult_inverse: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: maxsd %xmm1, %xmm0 -; CHECK-NEXT: ret +; CHECK: ucomisd %xmm1, %xmm0 +; UNSAFE: x_ult_inverse: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_ult_inverse: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_ult_inverse(double %x) nounwind { %c = fcmp ult double %x, 0.000000e+00 %d = select i1 %c, double 0.000000e+00, double %x @@ -280,6 +481,14 @@ define double @x_ult_inverse(double %x) nounwind { ; CHECK-NEXT: maxsd %xmm0, %xmm1 ; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: x_uge: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_uge: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_uge(double %x) nounwind { %c = fcmp uge double %x, 0.000000e+00 %d = select i1 %c, double %x, double 0.000000e+00 @@ -291,6 +500,14 @@ define double @x_uge(double %x) nounwind { ; CHECK-NEXT: minsd %xmm0, %xmm1 ; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: x_ule: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_ule: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_ule(double %x) nounwind { %c = fcmp ule double %x, 0.000000e+00 %d = select i1 %c, double %x, double 0.000000e+00 @@ -301,6 +518,16 @@ define double @x_ule(double %x) nounwind { ; CHECK-NEXT: pxor %xmm1, %xmm1 ; CHECK-NEXT: minsd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: x_uge_inverse: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_uge_inverse: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_uge_inverse(double %x) nounwind { %c = fcmp uge double %x, 0.000000e+00 %d = select i1 %c, double 0.000000e+00, double %x @@ -311,16 +538,301 @@ define double @x_uge_inverse(double %x) nounwind { ; CHECK-NEXT: pxor %xmm1, %xmm1 ; CHECK-NEXT: maxsd %xmm1, %xmm0 ; CHECK-NEXT: ret +; UNSAFE: x_ule_inverse: +; UNSAFE-NEXT: pxor %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: x_ule_inverse: +; FINITE-NEXT: pxor %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret define double @x_ule_inverse(double %x) nounwind { %c = fcmp ule double %x, 0.000000e+00 %d = select i1 %c, double 0.000000e+00, double %x ret double %d } +; CHECK: y_ogt: +; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0 +; CHECK-NEXT: ret +; UNSAFE: y_ogt: +; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_ogt: +; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0 +; FINITE-NEXT: ret +define double @y_ogt(double %x) nounwind { + %c = fcmp ogt double %x, -0.000000e+00 + %d = select i1 %c, double %x, double -0.000000e+00 + ret double %d +} + +; CHECK: y_olt: +; CHECK-NEXT: minsd {{[^,]*}}, %xmm0 +; CHECK-NEXT: ret +; UNSAFE: y_olt: +; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_olt: +; FINITE-NEXT: minsd {{[^,]*}}, %xmm0 +; FINITE-NEXT: ret +define double @y_olt(double %x) nounwind { + %c = fcmp olt double %x, -0.000000e+00 + %d = select i1 %c, double %x, double -0.000000e+00 + ret double %d +} + +; CHECK: y_ogt_inverse: +; CHECK-NEXT: movsd {{[^,]*}}, %xmm1 +; CHECK-NEXT: minsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +; UNSAFE: y_ogt_inverse: +; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 +; UNSAFE-NEXT: minsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_ogt_inverse: +; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 +; FINITE-NEXT: minsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret +define double @y_ogt_inverse(double %x) nounwind { + %c = fcmp ogt double %x, -0.000000e+00 + %d = select i1 %c, double -0.000000e+00, double %x + ret double %d +} + +; CHECK: y_olt_inverse: +; CHECK-NEXT: movsd {{[^,]*}}, %xmm1 +; CHECK-NEXT: maxsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +; UNSAFE: y_olt_inverse: +; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 +; UNSAFE-NEXT: maxsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_olt_inverse: +; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 +; FINITE-NEXT: maxsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret +define double @y_olt_inverse(double %x) nounwind { + %c = fcmp olt double %x, -0.000000e+00 + %d = select i1 %c, double -0.000000e+00, double %x + ret double %d +} + +; CHECK: y_oge: +; CHECK: ucomisd %xmm1, %xmm0 +; UNSAFE: y_oge: +; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_oge: +; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0 +; FINITE-NEXT: ret +define double @y_oge(double %x) nounwind { + %c = fcmp oge double %x, -0.000000e+00 + %d = select i1 %c, double %x, double -0.000000e+00 + ret double %d +} + +; CHECK: y_ole: +; CHECK: ucomisd %xmm0, %xmm1 +; UNSAFE: y_ole: +; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_ole: +; FINITE-NEXT: minsd {{[^,]*}}, %xmm0 +; FINITE-NEXT: ret +define double @y_ole(double %x) nounwind { + %c = fcmp ole double %x, -0.000000e+00 + %d = select i1 %c, double %x, double -0.000000e+00 + ret double %d +} + +; CHECK: y_oge_inverse: +; CHECK: ucomisd %xmm1, %xmm0 +; UNSAFE: y_oge_inverse: +; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 +; UNSAFE-NEXT: minsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_oge_inverse: +; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 +; FINITE-NEXT: minsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret +define double @y_oge_inverse(double %x) nounwind { + %c = fcmp oge double %x, -0.000000e+00 + %d = select i1 %c, double -0.000000e+00, double %x + ret double %d +} + +; CHECK: y_ole_inverse: +; CHECK: ucomisd %xmm0, %xmm1 +; UNSAFE: y_ole_inverse: +; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 +; UNSAFE-NEXT: maxsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_ole_inverse: +; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 +; FINITE-NEXT: maxsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret +define double @y_ole_inverse(double %x) nounwind { + %c = fcmp ole double %x, -0.000000e+00 + %d = select i1 %c, double -0.000000e+00, double %x + ret double %d +} + +; CHECK: y_ugt: +; CHECK: ucomisd %xmm0, %xmm1 +; UNSAFE: y_ugt: +; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_ugt: +; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0 +; FINITE-NEXT: ret +define double @y_ugt(double %x) nounwind { + %c = fcmp ugt double %x, -0.000000e+00 + %d = select i1 %c, double %x, double -0.000000e+00 + ret double %d +} + +; CHECK: y_ult: +; CHECK: ucomisd %xmm1, %xmm0 +; UNSAFE: y_ult: +; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_ult: +; FINITE-NEXT: minsd {{[^,]*}}, %xmm0 +; FINITE-NEXT: ret +define double @y_ult(double %x) nounwind { + %c = fcmp ult double %x, -0.000000e+00 + %d = select i1 %c, double %x, double -0.000000e+00 + ret double %d +} + +; CHECK: y_ugt_inverse: +; CHECK: ucomisd %xmm0, %xmm1 +; UNSAFE: y_ugt_inverse: +; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 +; UNSAFE-NEXT: minsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_ugt_inverse: +; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 +; FINITE-NEXT: minsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret +define double @y_ugt_inverse(double %x) nounwind { + %c = fcmp ugt double %x, -0.000000e+00 + %d = select i1 %c, double -0.000000e+00, double %x + ret double %d +} + +; CHECK: y_ult_inverse: +; CHECK: ucomisd %xmm1, %xmm0 +; UNSAFE: y_ult_inverse: +; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 +; UNSAFE-NEXT: maxsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_ult_inverse: +; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 +; FINITE-NEXT: maxsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret +define double @y_ult_inverse(double %x) nounwind { + %c = fcmp ult double %x, -0.000000e+00 + %d = select i1 %c, double -0.000000e+00, double %x + ret double %d +} + +; CHECK: y_uge: +; CHECK-NEXT: movsd {{[^,]*}}, %xmm1 +; CHECK-NEXT: maxsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +; UNSAFE: y_uge: +; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_uge: +; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0 +; FINITE-NEXT: ret +define double @y_uge(double %x) nounwind { + %c = fcmp uge double %x, -0.000000e+00 + %d = select i1 %c, double %x, double -0.000000e+00 + ret double %d +} + +; CHECK: y_ule: +; CHECK-NEXT: movsd {{[^,]*}}, %xmm1 +; CHECK-NEXT: minsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +; UNSAFE: y_ule: +; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_ule: +; FINITE-NEXT: minsd {{[^,]*}}, %xmm0 +; FINITE-NEXT: ret +define double @y_ule(double %x) nounwind { + %c = fcmp ule double %x, -0.000000e+00 + %d = select i1 %c, double %x, double -0.000000e+00 + ret double %d +} + +; CHECK: y_uge_inverse: +; CHECK-NEXT: minsd {{[^,]*}}, %xmm0 +; CHECK-NEXT: ret +; UNSAFE: y_uge_inverse: +; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 +; UNSAFE-NEXT: minsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_uge_inverse: +; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 +; FINITE-NEXT: minsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret +define double @y_uge_inverse(double %x) nounwind { + %c = fcmp uge double %x, -0.000000e+00 + %d = select i1 %c, double -0.000000e+00, double %x + ret double %d +} + +; CHECK: y_ule_inverse: +; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0 +; CHECK-NEXT: ret +; UNSAFE: y_ule_inverse: +; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 +; UNSAFE-NEXT: maxsd %xmm0, %xmm1 +; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: ret +; FINITE: y_ule_inverse: +; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 +; FINITE-NEXT: maxsd %xmm0, %xmm1 +; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: ret +define double @y_ule_inverse(double %x) nounwind { + %c = fcmp ule double %x, -0.000000e+00 + %d = select i1 %c, double -0.000000e+00, double %x + ret double %d +} ; Test a few more misc. cases. ; CHECK: clampTo3k_a: ; CHECK: minsd +; UNSAFE: clampTo3k_a: +; UNSAFE: minsd +; FINITE: clampTo3k_a: +; FINITE: minsd define double @clampTo3k_a(double %x) nounwind readnone { entry: %0 = fcmp ogt double %x, 3.000000e+03 ; <i1> [#uses=1] @@ -330,6 +842,10 @@ entry: ; CHECK: clampTo3k_b: ; CHECK: minsd +; UNSAFE: clampTo3k_b: +; UNSAFE: minsd +; FINITE: clampTo3k_b: +; FINITE: minsd define double @clampTo3k_b(double %x) nounwind readnone { entry: %0 = fcmp uge double %x, 3.000000e+03 ; <i1> [#uses=1] @@ -339,6 +855,10 @@ entry: ; CHECK: clampTo3k_c: ; CHECK: maxsd +; UNSAFE: clampTo3k_c: +; UNSAFE: maxsd +; FINITE: clampTo3k_c: +; FINITE: maxsd define double @clampTo3k_c(double %x) nounwind readnone { entry: %0 = fcmp olt double %x, 3.000000e+03 ; <i1> [#uses=1] @@ -348,6 +868,10 @@ entry: ; CHECK: clampTo3k_d: ; CHECK: maxsd +; UNSAFE: clampTo3k_d: +; UNSAFE: maxsd +; FINITE: clampTo3k_d: +; FINITE: maxsd define double @clampTo3k_d(double %x) nounwind readnone { entry: %0 = fcmp ule double %x, 3.000000e+03 ; <i1> [#uses=1] @@ -357,6 +881,10 @@ entry: ; CHECK: clampTo3k_e: ; CHECK: maxsd +; UNSAFE: clampTo3k_e: +; UNSAFE: maxsd +; FINITE: clampTo3k_e: +; FINITE: maxsd define double @clampTo3k_e(double %x) nounwind readnone { entry: %0 = fcmp olt double %x, 3.000000e+03 ; <i1> [#uses=1] @@ -366,6 +894,10 @@ entry: ; CHECK: clampTo3k_f: ; CHECK: maxsd +; UNSAFE: clampTo3k_f: +; UNSAFE: maxsd +; FINITE: clampTo3k_f: +; FINITE: maxsd define double @clampTo3k_f(double %x) nounwind readnone { entry: %0 = fcmp ule double %x, 3.000000e+03 ; <i1> [#uses=1] @@ -375,6 +907,10 @@ entry: ; CHECK: clampTo3k_g: ; CHECK: minsd +; UNSAFE: clampTo3k_g: +; UNSAFE: minsd +; FINITE: clampTo3k_g: +; FINITE: minsd define double @clampTo3k_g(double %x) nounwind readnone { entry: %0 = fcmp ogt double %x, 3.000000e+03 ; <i1> [#uses=1] @@ -384,6 +920,10 @@ entry: ; CHECK: clampTo3k_h: ; CHECK: minsd +; UNSAFE: clampTo3k_h: +; UNSAFE: minsd +; FINITE: clampTo3k_h: +; FINITE: minsd define double @clampTo3k_h(double %x) nounwind readnone { entry: %0 = fcmp uge double %x, 3.000000e+03 ; <i1> [#uses=1] diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll index b2af7c947d..921161e4a1 100644 --- a/test/CodeGen/X86/sse3.ll +++ b/test/CodeGen/X86/sse3.ll @@ -144,10 +144,9 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind { store <4 x float> %tmp13, <4 x float>* %r ret void ; X64: t9: -; X64: movsd (%rsi), %xmm0 -; X64: movaps (%rdi), %xmm1 -; X64: movlhps %xmm0, %xmm1 -; X64: movaps %xmm1, (%rdi) +; X64: movaps (%rdi), %xmm0 +; X64: movhps (%rsi), %xmm0 +; X64: movaps %xmm0, (%rdi) ; X64: ret } diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll index cb65e9b50f..e971ef70db 100644 --- a/test/CodeGen/X86/stack-align.ll +++ b/test/CodeGen/X86/stack-align.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -relocation-model=static -mcpu=yonah | grep {andpd.*4(%esp), %xmm} +; RUN: llc < %s -relocation-model=static -realign-stack=1 -mcpu=yonah | FileCheck %s ; The double argument is at 4(esp) which is 16-byte aligned, allowing us to ; fold the load into the andpd. @@ -12,6 +12,7 @@ entry: %tmp = getelementptr { double, double }* %z, i32 0, i32 0 ; <double*> [#uses=1] %tmp1 = load double* %tmp, align 8 ; <double> [#uses=1] %tmp2 = tail call double @fabs( double %tmp1 ) ; <double> [#uses=1] + ; CHECK: andpd{{.*}}4(%esp), %xmm %tmp3 = load double* @G, align 16 ; <double> [#uses=1] %tmp4 = tail call double @fabs( double %tmp3 ) ; <double> [#uses=1] %tmp6 = fadd double %tmp4, %tmp2 ; <double> [#uses=1] @@ -19,4 +20,20 @@ entry: ret void } +define void @test2() alignstack(16) { +entry: + ; CHECK: andl{{.*}}$-16, %esp + ret void +} + +; Use a call to force a spill. +define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) { +entry: + ; CHECK: andl{{.*}}$-32, %esp + call void @test2() + %A = mul <2 x double> %x, %y + ret <2 x double> %A +} + declare double @fabs(double) + diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll index 7d85818d46..42e7a394d8 100644 --- a/test/CodeGen/X86/stack-color-with-reg.ll +++ b/test/CodeGen/X86/stack-color-with-reg.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t -; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 14 +; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 8 type { [62 x %struct.Bitvec*] } ; type %0 type { i8* } ; type %1 diff --git a/test/CodeGen/X86/stdcall.ll b/test/CodeGen/X86/stdcall.ll new file mode 100644 index 0000000000..70204bcf47 --- /dev/null +++ b/test/CodeGen/X86/stdcall.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s | FileCheck %s +; PR5851 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-mingw32" + +%0 = type { void (...)* } + +@B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4 +; CHECK: _B: +; CHECK: .long _MyFunc@0 + +define internal x86_stdcallcc void @MyFunc() nounwind { +entry: + ret void +} diff --git a/test/CodeGen/X86/store_op_load_fold.ll b/test/CodeGen/X86/store_op_load_fold.ll index 66d0e47c6d..6e47eb397d 100644 --- a/test/CodeGen/X86/store_op_load_fold.ll +++ b/test/CodeGen/X86/store_op_load_fold.ll @@ -4,7 +4,7 @@ @X = internal global i16 0 ; <i16*> [#uses=2] -define void @foo() { +define void @foo() nounwind { %tmp.0 = load i16* @X ; <i16> [#uses=1] %tmp.3 = add i16 %tmp.0, 329 ; <i16> [#uses=1] store i16 %tmp.3, i16* @X diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll index 0ccfe470db..46e59e95e5 100644 --- a/test/CodeGen/X86/store_op_load_fold2.ll +++ b/test/CodeGen/X86/store_op_load_fold2.ll @@ -1,21 +1,12 @@ -; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ -; RUN: grep {and DWORD PTR} | count 2 +; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s target datalayout = "e-p:32:32" %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 } -define internal fastcc i32 @dct_chroma(i32 %uv, i32 %cr_cbp) { -entry: - br i1 true, label %cond_true2732.preheader, label %cond_true129 -cond_true129: ; preds = %entry - ret i32 0 +define internal fastcc i32 @dct_chroma(i32 %uv, i32 %cr_cbp) nounwind { cond_true2732.preheader: ; preds = %entry %tmp2666 = getelementptr %struct.Macroblock* null, i32 0, i32 13 ; <i64*> [#uses=2] %tmp2674 = trunc i32 0 to i8 ; <i8> [#uses=1] - br i1 true, label %cond_true2732.preheader.split.us, label %cond_true2732.preheader.split -cond_true2732.preheader.split.us: ; preds = %cond_true2732.preheader - br i1 true, label %cond_true2732.outer.us.us, label %cond_true2732.outer.us -cond_true2732.outer.us.us: ; preds = %cond_true2732.preheader.split.us %tmp2667.us.us = load i64* %tmp2666 ; <i64> [#uses=1] %tmp2670.us.us = load i64* null ; <i64> [#uses=1] %shift.upgrd.1 = zext i8 %tmp2674 to i64 ; <i64> [#uses=1] @@ -24,11 +15,10 @@ cond_true2732.outer.us.us: ; preds = %cond_true2732.preheader.split %tmp2676.us.us = and i64 %tmp2667.us.us, %tmp2675not.us.us ; <i64> [#uses=1] store i64 %tmp2676.us.us, i64* %tmp2666 ret i32 0 -cond_true2732.outer.us: ; preds = %cond_true2732.preheader.split.us - ret i32 0 -cond_true2732.preheader.split: ; preds = %cond_true2732.preheader - ret i32 0 -cond_next2752: ; No predecessors! - ret i32 0 + +; CHECK: and {{E..}}, DWORD PTR [360] +; CHECK: and DWORD PTR [356], {{E..}} +; CHECK: mov DWORD PTR [360], {{E..}} + } diff --git a/test/CodeGen/X86/tailcall2.ll b/test/CodeGen/X86/tailcall2.ll index 80bab619c1..90315fd2f2 100644 --- a/test/CodeGen/X86/tailcall2.ll +++ b/test/CodeGen/X86/tailcall2.ll @@ -195,3 +195,24 @@ bb2: } declare i32 @foo6(i32, i32, %struct.t* byval align 4) + +; rdar://r7717598 +%struct.ns = type { i32, i32 } +%struct.cp = type { float, float } + +define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp { +; 32: t13: +; 32-NOT: jmp +; 32: call +; 32: ret + +; 64: t13: +; 64-NOT: jmp +; 64: call +; 64: ret +entry: + %0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval align 4 %yy, i8 signext 0) nounwind + ret %struct.ns* %0 +} + +declare fastcc %struct.ns* @foo7(%struct.cp* byval align 4, i8 signext) nounwind ssp diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll index bfab1aef90..6062084106 100644 --- a/test/CodeGen/X86/trunc-to-bool.ll +++ b/test/CodeGen/X86/trunc-to-bool.ll @@ -3,13 +3,14 @@ ; value and as the operand of a branch. ; RUN: llc < %s -march=x86 | FileCheck %s -define i1 @test1(i32 %X) zeroext { +define i1 @test1(i32 %X) zeroext nounwind { %Y = trunc i32 %X to i1 ret i1 %Y } +; CHECK: test1: ; CHECK: andl $1, %eax -define i1 @test2(i32 %val, i32 %mask) { +define i1 @test2(i32 %val, i32 %mask) nounwind { entry: %shifted = ashr i32 %val, %mask %anded = and i32 %shifted, 1 @@ -20,9 +21,10 @@ ret_true: ret_false: ret i1 false } -; CHECK: testb $1, %al +; CHECK: test2: +; CHECK: btl %eax -define i32 @test3(i8* %ptr) { +define i32 @test3(i8* %ptr) nounwind { %val = load i8* %ptr %tmp = trunc i8 %val to i1 br i1 %tmp, label %cond_true, label %cond_false @@ -31,9 +33,10 @@ cond_true: cond_false: ret i32 42 } -; CHECK: testb $1, %al +; CHECK: test3: +; CHECK: testb $1, (%eax) -define i32 @test4(i8* %ptr) { +define i32 @test4(i8* %ptr) nounwind { %tmp = ptrtoint i8* %ptr to i1 br i1 %tmp, label %cond_true, label %cond_false cond_true: @@ -41,9 +44,10 @@ cond_true: cond_false: ret i32 42 } -; CHECK: testb $1, %al +; CHECK: test4: +; CHECK: testb $1, 4(%esp) -define i32 @test6(double %d) { +define i32 @test5(double %d) nounwind { %tmp = fptosi double %d to i1 br i1 %tmp, label %cond_true, label %cond_false cond_true: @@ -51,4 +55,5 @@ cond_true: cond_false: ret i32 42 } +; CHECK: test5: ; CHECK: testb $1 diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll index d0e13f61f2..4c37225ce0 100644 --- a/test/CodeGen/X86/twoaddr-coalesce.ll +++ b/test/CodeGen/X86/twoaddr-coalesce.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep mov | count 5 +; RUN: llc < %s -march=x86 | grep mov | count 4 ; rdar://6523745 @"\01LC" = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll index 2dd2a4adac..c2f0c23fe1 100644 --- a/test/CodeGen/X86/use-add-flags.ll +++ b/test/CodeGen/X86/use-add-flags.ll @@ -5,13 +5,13 @@ ; Use the flags on the add. -; CHECK: add_zf: +; CHECK: test1: ; CHECK: addl (%rdi), %esi ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: cmovnsl %ecx, %eax ; CHECK-NEXT: ret -define i32 @add_zf(i32* %x, i32 %y, i32 %a, i32 %b) nounwind { +define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind { %tmp2 = load i32* %x, align 4 ; <i32> [#uses=1] %tmp4 = add i32 %tmp2, %y ; <i32> [#uses=1] %tmp5 = icmp slt i32 %tmp4, 0 ; <i1> [#uses=1] @@ -24,10 +24,10 @@ declare void @foo(i32) ; Don't use the flags result of the and here, since the and has no ; other use. A simple test is better. -; CHECK: bar: +; CHECK: test2: ; CHECK: testb $16, %dil -define void @bar(i32 %x) nounwind { +define void @test2(i32 %x) nounwind { %y = and i32 %x, 16 %t = icmp eq i32 %y, 0 br i1 %t, label %true, label %false @@ -40,11 +40,11 @@ false: ; Do use the flags result of the and here, since the and has another use. -; CHECK: qux: +; CHECK: test3: ; CHECK: andl $16, %edi ; CHECK-NEXT: jne -define void @qux(i32 %x) nounwind { +define void @test3(i32 %x) nounwind { %y = and i32 %x, 16 %t = icmp eq i32 %y, 0 br i1 %t, label %true, label %false diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll index 1f899b3c20..6f18d13cc9 100644 --- a/test/CodeGen/X86/vec_cast.ll +++ b/test/CodeGen/X86/vec_cast.ll @@ -31,11 +31,10 @@ define <1 x i32> @f(<1 x i16> %a) nounwind { ret <1 x i32> %c } -; TODO: Legalize doesn't yet handle this. -;define <8 x i16> @g(<8 x i32> %a) nounwind { -; %c = trunc <8 x i32> %a to <8 x i16> -; ret <8 x i16> %c -;} +define <8 x i16> @g(<8 x i32> %a) nounwind { + %c = trunc <8 x i32> %a to <8 x i16> + ret <8 x i16> %c +} define <3 x i16> @h(<3 x i32> %a) nounwind { %c = trunc <3 x i32> %a to <3 x i16> @@ -46,3 +45,12 @@ define <1 x i16> @i(<1 x i32> %a) nounwind { %c = trunc <1 x i32> %a to <1 x i16> ret <1 x i16> %c } + +; PR6438 +define void @__OpenCL_math_kernel4_kernel() nounwind { + %tmp12.i = and <4 x i32> zeroinitializer, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040> ; <<4 x i32>> [#uses=1] + %cmp13.i = icmp eq <4 x i32> %tmp12.i, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040> ; <<4 x i1>> [#uses=2] + %cmp.ext14.i = sext <4 x i1> %cmp13.i to <4 x i32> ; <<4 x i32>> [#uses=0] + %tmp2110.i = and <4 x i1> %cmp13.i, zeroinitializer ; <<4 x i1>> [#uses=0] + ret void +} diff --git a/test/CodeGen/X86/vec_insert.ll b/test/CodeGen/X86/vec_insert.ll index a7274a9000..4e5d445ff6 100644 --- a/test/CodeGen/X86/vec_insert.ll +++ b/test/CodeGen/X86/vec_insert.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1 ; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw -define void @test(<4 x float>* %F, i32 %I) { +define void @test(<4 x float>* %F, i32 %I) nounwind { %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=1] %f = sitofp i32 %I to float ; <float> [#uses=1] %tmp1 = insertelement <4 x float> %tmp, float %f, i32 0 ; <<4 x float>> [#uses=2] @@ -10,7 +10,7 @@ define void @test(<4 x float>* %F, i32 %I) { ret void } -define void @test2(<4 x float>* %F, i32 %I, float %g) { +define void @test2(<4 x float>* %F, i32 %I, float %g) nounwind { %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=1] %f = sitofp i32 %I to float ; <float> [#uses=1] %tmp1 = insertelement <4 x float> %tmp, float %f, i32 2 ; <<4 x float>> [#uses=1] diff --git a/test/CodeGen/X86/vec_shuffle-36.ll b/test/CodeGen/X86/vec_shuffle-36.ll index 8a93a7eeee..1ea37c881e 100644 --- a/test/CodeGen/X86/vec_shuffle-36.ll +++ b/test/CodeGen/X86/vec_shuffle-36.ll @@ -1,9 +1,16 @@ -; RUN: llc < %s -march=x86 -mattr=sse41 -o %t -; RUN: grep pshufb %t | count 1 - +; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { +; CHECK: pshufb +; CHECK-NOT: pshufb +; CHECK: ret entry: - %tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 2, i32 0, i32 2, i32 1, i32 5, i32 6 , i32 undef > - ret <8 x i16> %tmp9 + %tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 2, i32 0, i32 2, i32 1, i32 5, i32 6 , i32 undef > + ret <8 x i16> %tmp9 } + +define <8 x i16> @shuf7(<8 x i16> %t0) { +; CHECK: pshufd + %tmp10 = shufflevector <8 x i16> %t0, <8 x i16> undef, <8 x i32> < i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef > + ret <8 x i16> %tmp10 +}
\ No newline at end of file diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll index b1613fb3a3..c8b2927b71 100644 --- a/test/CodeGen/X86/vec_ss_load_fold.ll +++ b/test/CodeGen/X86/vec_ss_load_fold.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 -o %t -; RUN: grep minss %t | grep CPI | count 2 -; RUN: grep CPI %t | not grep movss +; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse41 | FileCheck %s target datalayout = "e-p:32:32" target triple = "i686-apple-darwin8.7.2" @@ -17,6 +15,10 @@ define i16 @test1(float %f) nounwind { %tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1] %tmp69 = trunc i32 %tmp.upgrd.1 to i16 ; <i16> [#uses=1] ret i16 %tmp69 +; CHECK: test1: +; CHECK: subss LCPI1_ +; CHECK: mulss LCPI1_ +; CHECK: minss LCPI1_ } define i16 @test2(float %f) nounwind { @@ -28,6 +30,10 @@ define i16 @test2(float %f) nounwind { %tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1] %tmp69 = trunc i32 %tmp to i16 ; <i16> [#uses=1] ret i16 %tmp69 +; CHECK: test2: +; CHECK: addss LCPI2_ +; CHECK: mulss LCPI2_ +; CHECK: minss LCPI2_ } declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) @@ -39,3 +45,28 @@ declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) + + +declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) +declare <4 x float> @f() + +define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind { + %a = load float *%b + %B = insertelement <4 x float> undef, float %a, i32 0 + %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4) + ret <4 x float> %X +; CHECK: test3: +; CHECK: roundss $4, (%eax), %xmm0 +} + +define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind { + %a = load float *%b + %B = insertelement <4 x float> undef, float %a, i32 0 + %q = call <4 x float> @f() + %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4) + ret <4 x float> %X +; CHECK: test4: +; CHECK: movss (%eax), %xmm +; CHECK: call +; CHECK: roundss $4, %xmm{{.*}}, %xmm0 +} diff --git a/test/CodeGen/X86/xor-icmp.ll b/test/CodeGen/X86/xor-icmp.ll index a6bdb13ec6..2d75c5d762 100644 --- a/test/CodeGen/X86/xor-icmp.ll +++ b/test/CodeGen/X86/xor-icmp.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32 ; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64 +; rdar://7367229 define i32 @t(i32 %a, i32 %b) nounwind ssp { entry: @@ -34,3 +35,33 @@ bb1: ; preds = %entry declare i32 @foo(...) declare i32 @bar(...) + +define i32 @t2(i32 %x, i32 %y) nounwind ssp { +; X32: t2: +; X32: cmpl +; X32: sete +; X32: cmpl +; X32: sete +; X32-NOT: xor +; X32: je + +; X64: t2: +; X64: testl +; X64: sete +; X64: testl +; X64: sete +; X64-NOT: xor +; X64: je +entry: + %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1] + %1 = icmp eq i32 %y, 0 ; <i1> [#uses=1] + %2 = xor i1 %1, %0 ; <i1> [#uses=1] + br i1 %2, label %bb, label %return + +bb: ; preds = %entry + %3 = tail call i32 (...)* @foo() nounwind ; <i32> [#uses=0] + ret i32 undef + +return: ; preds = %entry + ret i32 undef +} diff --git a/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll b/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll new file mode 100644 index 0000000000..6ad9a73899 --- /dev/null +++ b/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=xcore +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "xcore-xmos-elf" + +%0 = type { i32 } +%struct.dwarf_fde = type <{ i32, i32, [0 x i8] }> +%struct.object = type { i8*, i8*, i8*, %union.anon, %0, %struct.object* } +%union.anon = type { %struct.dwarf_fde* } + +define %struct.dwarf_fde* @search_object(%struct.object* %ob, i8* %pc) { +entry: + br i1 undef, label %bb3.i15.i.i, label %bb2 + +bb3.i15.i.i: ; preds = %bb3.i15.i.i, %entry + %indvar.i.i.i = phi i32 [ %indvar.next.i.i.i, %bb3.i15.i.i ], [ 0, %entry ] ; <i32> [#uses=2] + %tmp137 = sub i32 0, %indvar.i.i.i ; <i32> [#uses=1] + %scevgep13.i.i.i = getelementptr i32* undef, i32 %tmp137 ; <i32*> [#uses=2] + %scevgep1314.i.i.i = bitcast i32* %scevgep13.i.i.i to %struct.dwarf_fde** ; <%struct.dwarf_fde**> [#uses=1] + %0 = load %struct.dwarf_fde** %scevgep1314.i.i.i, align 4 ; <%struct.dwarf_fde*> [#uses=0] + store i32 undef, i32* %scevgep13.i.i.i + %indvar.next.i.i.i = add i32 %indvar.i.i.i, 1 ; <i32> [#uses=1] + br label %bb3.i15.i.i + +bb2: ; preds = %entry + ret %struct.dwarf_fde* undef +} diff --git a/test/CodeGen/XCore/switch.ll b/test/CodeGen/XCore/switch.ll new file mode 100644 index 0000000000..9cc27f2ffa --- /dev/null +++ b/test/CodeGen/XCore/switch.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=xcore < %s | FileCheck %s + +define i32 @switch(i32 %i) { +entry: + switch i32 %i, label %default [ + i32 0, label %bb0 + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + ] +; CHECK-NOT: shl +; CHECK: bru +; CHECK: .jmptable +bb0: + ret i32 0 +bb1: + ret i32 1 +bb2: + ret i32 2 +bb3: + ret i32 3 +default: + ret i32 4 +} diff --git a/test/CodeGen/XCore/switch_long.ll b/test/CodeGen/XCore/switch_long.ll new file mode 100644 index 0000000000..30c9e3db73 --- /dev/null +++ b/test/CodeGen/XCore/switch_long.ll @@ -0,0 +1,132 @@ +; RUN: llc -march=xcore < %s | FileCheck %s + +define i32 @switch(i32 %i) { +entry: + switch i32 %i, label %default [ + i32 0, label %bb0 + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb5 + i32 6, label %bb6 + i32 7, label %bb7 + i32 8, label %bb8 + i32 9, label %bb9 + i32 10, label %bb10 + i32 11, label %bb11 + i32 12, label %bb12 + i32 13, label %bb13 + i32 14, label %bb14 + i32 15, label %bb15 + i32 16, label %bb16 + i32 17, label %bb17 + i32 18, label %bb18 + i32 19, label %bb19 + i32 20, label %bb20 + i32 21, label %bb21 + i32 22, label %bb22 + i32 23, label %bb23 + i32 24, label %bb24 + i32 25, label %bb25 + i32 26, label %bb26 + i32 27, label %bb27 + i32 28, label %bb28 + i32 29, label %bb29 + i32 30, label %bb30 + i32 31, label %bb31 + i32 32, label %bb32 + i32 33, label %bb33 + i32 34, label %bb34 + i32 35, label %bb35 + i32 36, label %bb36 + i32 37, label %bb37 + i32 38, label %bb38 + i32 39, label %bb39 + ] +; CHECK: shl +; CHECK: bru +; CHECK: .jmptable +bb0: + ret i32 0 +bb1: + ret i32 1 +bb2: + ret i32 2 +bb3: + ret i32 3 +bb4: + ret i32 4 +bb5: + ret i32 5 +bb6: + ret i32 6 +bb7: + ret i32 7 +bb8: + ret i32 8 +bb9: + ret i32 9 +bb10: + ret i32 0 +bb11: + ret i32 1 +bb12: + ret i32 2 +bb13: + ret i32 3 +bb14: + ret i32 4 +bb15: + ret i32 5 +bb16: + ret i32 6 +bb17: + ret i32 7 +bb18: + ret i32 8 +bb19: + ret i32 9 +bb20: + ret i32 0 +bb21: + ret i32 1 +bb22: + ret i32 2 +bb23: + ret i32 3 +bb24: + ret i32 4 +bb25: + ret i32 5 +bb26: + ret i32 6 +bb27: + ret i32 7 +bb28: + ret i32 8 +bb29: + ret i32 9 +bb30: + ret i32 0 +bb31: + ret i32 1 +bb32: + ret i32 2 +bb33: + ret i32 3 +bb34: + ret i32 4 +bb35: + ret i32 5 +bb36: + ret i32 6 +bb37: + ret i32 7 +bb38: + ret i32 8 +bb39: + ret i32 9 +default: + ret i32 0 +} |