diff options
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/ARM/arm-negative-stride.ll | 26 | ||||
-rw-r--r-- | test/CodeGen/ARM/lsr-code-insertion.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/remat.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/lsr-deficiency.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-ifcvt1.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/X86/2006-05-11-InstrSched.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2007-08-13-SpillerReuse.ll | 102 | ||||
-rw-r--r-- | test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/2009-09-10-SpillComments.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/X86/full-lsr.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/X86/iv-users-in-other-loops.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-strength-reduce4.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-strength-reduce8.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/lsr-reuse.ll | 159 | ||||
-rw-r--r-- | test/CodeGen/X86/masked-iv-safe.ll | 7 | ||||
-rw-r--r-- | test/CodeGen/X86/pr3495-2.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/X86/remat-mov-0.ll | 26 | ||||
-rw-r--r-- | test/CodeGen/X86/remat-mov-1.ll | 40 | ||||
-rw-r--r-- | test/CodeGen/X86/subreg-to-reg-5.ll | 35 |
19 files changed, 279 insertions, 215 deletions
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll index 72ec8efcc4..52ab8717c1 100644 --- a/test/CodeGen/ARM/arm-negative-stride.ll +++ b/test/CodeGen/ARM/arm-negative-stride.ll @@ -1,7 +1,32 @@ ; RUN: llc < %s -march=arm | FileCheck %s +; This loop is rewritten with an indvar which counts down, which +; frees up a register from holding the trip count. + define void @test(i32* %P, i32 %A, i32 %i) nounwind { entry: +; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2] + icmp eq i32 %i, 0 ; <i1>:0 [#uses=1] + br i1 %0, label %return, label %bb + +bb: ; preds = %bb, %entry + %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2] + %i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1] + %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1] + store i32 %A, i32* %tmp2 + %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2] + icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1] + br i1 %1, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} + +; This loop has a non-address use of the count-up indvar, so +; it'll remain. Now the original store uses a negative-stride address. + +define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind { +entry: ; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2] icmp eq i32 %i, 0 ; <i1>:0 [#uses=1] br i1 %0, label %return, label %bb @@ -11,6 +36,7 @@ bb: ; preds = %bb, %entry %i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1] %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1] store i32 %A, i32* %tmp2 + store i32 %indvar, i32* null %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2] icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1] br i1 %1, label %return, label %bb diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll index 507ec2c7bd..1bbb96deee 100644 --- a/test/CodeGen/ARM/lsr-code-insertion.ll +++ b/test/CodeGen/ARM/lsr-code-insertion.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed} -; RUN: llc < %s -stats |& grep {.*Number of re-materialization} +; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed} +; RUN: llc < %s -stats |& not grep {.*Number of re-materialization} ; This test really wants to check that the resultant "cond_true" block only ; has a single store in it, and that cond_true55 only has code to materialize ; the constant and do a store. We do *not* want something like this: diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll index 9565c8bca6..9072bcb762 100644 --- a/test/CodeGen/ARM/remat.ll +++ b/test/CodeGen/ARM/remat.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin -; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 3 +; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | not grep "Number of re-materialization" %struct.CONTENTBOX = type { i32, i32, i32, i32, i32 } %struct.LOCBOX = type { i32, i32, i32, i32 } diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll index 7b1b57a786..ac2cd34e4b 100644 --- a/test/CodeGen/Thumb2/lsr-deficiency.ll +++ b/test/CodeGen/Thumb2/lsr-deficiency.ll @@ -1,25 +1,29 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s ; rdar://7387640 -; FIXME: We still need to rewrite array reference iv of stride -4 with loop -; count iv of stride -1. +; This now reduces to a single induction variable. + +; TODO: It still gets a GPR shuffle at the end of the loop +; This is because something in instruction selection has decided +; that comparing the pre-incremented value with zero is better +; than comparing the post-incremented value with -4. @G = external global i32 ; <i32*> [#uses=2] @array = external global i32* ; <i32**> [#uses=1] define arm_apcscc void @t() nounwind optsize { ; CHECK: t: -; CHECK: mov.w r2, #4000 -; CHECK: movw r3, #1001 +; CHECK: mov.w r2, #1000 entry: %.pre = load i32* @G, align 4 ; <i32> [#uses=1] br label %bb bb: ; preds = %bb, %entry ; CHECK: LBB1_1: -; CHECK: subs r3, #1 -; CHECK: cmp r3, #0 -; CHECK: sub.w r2, r2, #4 +; CHECK: cmp r2, #0 +; CHECK: sub.w r9, r2, #1 +; CHECK: mov r2, r9 + %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1] %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2] %tmp5 = sub i32 1000, %indvar ; <i32> [#uses=1] diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll index 71199abc57..1d267565e0 100644 --- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll +++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { +define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK: t1: ; CHECK: it ne ; CHECK: cmpne @@ -20,12 +20,12 @@ cond_next: } ; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt. -define i32 @t2(i32 %a, i32 %b) { +define i32 @t2(i32 %a, i32 %b) nounwind { entry: ; CHECK: t2: -; CHECK: ite le -; CHECK: suble +; CHECK: ite gt ; CHECK: subgt +; CHECK: suble %tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1] br i1 %tmp1434, label %bb17, label %bb.outer @@ -60,14 +60,14 @@ bb17: ; preds = %cond_false, %cond_true, %entry @x = external global i32* ; <i32**> [#uses=1] -define void @foo(i32 %a) { +define void @foo(i32 %a) nounwind { entry: %tmp = load i32** @x ; <i32*> [#uses=1] store i32 %a, i32* %tmp ret void } -define void @t3(i32 %a, i32 %b) { +define void @t3(i32 %a, i32 %b) nounwind { entry: ; CHECK: t3: ; CHECK: it lt diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll index bdbe713a29..56d6aa960e 100644 --- a/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\ -; RUN: grep {asm-printer} | grep 31 +; RUN: grep {asm-printer} | grep 34 target datalayout = "e-p:32:32" define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind { @@ -40,7 +40,7 @@ cond_true: ; preds = %cond_true, %entry %tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1] store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7 %tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1] - %tmp.upgrd.8 = icmp slt i32 %tmp147, %M ; <i1> [#uses=1] + %tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1] %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] br i1 %tmp.upgrd.8, label %cond_true, label %return diff --git a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll b/test/CodeGen/X86/2007-08-13-SpillerReuse.ll deleted file mode 100644 index d6ea5109d1..0000000000 --- a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll +++ /dev/null @@ -1,102 +0,0 @@ -; RUN: llc < %s -mtriple=i686-apple-darwin | grep "48(%esp)" | count 5 - - %struct..0anon = type { i32 } - %struct.rtvec_def = type { i32, [1 x %struct..0anon] } - %struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] } -@rtx_format = external global [116 x i8*] ; <[116 x i8*]*> [#uses=1] -@rtx_length = external global [117 x i32] ; <[117 x i32]*> [#uses=1] - -declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32) - -define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) { -entry: - %tmp2 = icmp eq %struct.rtx_def* %x, null ; <i1> [#uses=1] - br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next - -cond_next: ; preds = %entry - %tmp6 = getelementptr %struct.rtx_def* %x, i32 0, i32 0 ; <i16*> [#uses=1] - %tmp7 = load i16* %tmp6 ; <i16> [#uses=2] - %tmp78 = zext i16 %tmp7 to i32 ; <i32> [#uses=2] - %tmp10 = icmp eq i16 %tmp7, 54 ; <i1> [#uses=1] - br i1 %tmp10, label %cond_true13, label %cond_next32 - -cond_true13: ; preds = %cond_next - %tmp15 = getelementptr %struct.rtx_def* %x, i32 0, i32 3 ; <[1 x %struct..0anon]*> [#uses=1] - %tmp1718 = bitcast [1 x %struct..0anon]* %tmp15 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1] - %tmp19 = load %struct.rtx_def** %tmp1718 ; <%struct.rtx_def*> [#uses=1] - %tmp20 = getelementptr %struct.rtx_def* %tmp19, i32 0, i32 0 ; <i16*> [#uses=1] - %tmp21 = load i16* %tmp20 ; <i16> [#uses=1] - %tmp22 = icmp eq i16 %tmp21, 57 ; <i1> [#uses=1] - br i1 %tmp22, label %cond_true25, label %cond_next32 - -cond_true25: ; preds = %cond_true13 - %tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 ) ; <%struct.rtx_def*> [#uses=1] - ret %struct.rtx_def* %tmp29 - -cond_next32: ; preds = %cond_true13, %cond_next - %tmp34 = getelementptr [116 x i8*]* @rtx_format, i32 0, i32 %tmp78 ; <i8**> [#uses=1] - %tmp35 = load i8** %tmp34, align 4 ; <i8*> [#uses=1] - %tmp37 = getelementptr [117 x i32]* @rtx_length, i32 0, i32 %tmp78 ; <i32*> [#uses=1] - %tmp38 = load i32* %tmp37, align 4 ; <i32> [#uses=1] - %i.011 = add i32 %tmp38, -1 ; <i32> [#uses=2] - %tmp12513 = icmp sgt i32 %i.011, -1 ; <i1> [#uses=1] - br i1 %tmp12513, label %bb, label %UnifiedReturnBlock - -bb: ; preds = %bb123, %cond_next32 - %indvar = phi i32 [ %indvar.next26, %bb123 ], [ 0, %cond_next32 ] ; <i32> [#uses=2] - %i.01.0 = sub i32 %i.011, %indvar ; <i32> [#uses=5] - %tmp42 = getelementptr i8* %tmp35, i32 %i.01.0 ; <i8*> [#uses=2] - %tmp43 = load i8* %tmp42 ; <i8> [#uses=1] - switch i8 %tmp43, label %bb123 [ - i8 101, label %cond_true47 - i8 69, label %bb105.preheader - ] - -cond_true47: ; preds = %bb - %tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1] - %tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1] - %tmp55 = load %struct.rtx_def** %tmp5354 ; <%struct.rtx_def*> [#uses=1] - %tmp58 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1] - %tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0 ; <i32*> [#uses=1] - %tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32 ; <i32> [#uses=1] - store i32 %tmp58.c, i32* %tmp62 - %tmp6816 = load i8* %tmp42 ; <i8> [#uses=1] - %tmp6917 = icmp eq i8 %tmp6816, 69 ; <i1> [#uses=1] - br i1 %tmp6917, label %bb105.preheader, label %bb123 - -bb105.preheader: ; preds = %cond_true47, %bb - %tmp11020 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1] - %tmp11111221 = bitcast %struct..0anon* %tmp11020 to %struct.rtvec_def** ; <%struct.rtvec_def**> [#uses=3] - %tmp11322 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=1] - %tmp11423 = getelementptr %struct.rtvec_def* %tmp11322, i32 0, i32 0 ; <i32*> [#uses=1] - %tmp11524 = load i32* %tmp11423 ; <i32> [#uses=1] - %tmp11625 = icmp eq i32 %tmp11524, 0 ; <i1> [#uses=1] - br i1 %tmp11625, label %bb123, label %bb73 - -bb73: ; preds = %bb73, %bb105.preheader - %j.019 = phi i32 [ %tmp104, %bb73 ], [ 0, %bb105.preheader ] ; <i32> [#uses=3] - %tmp81 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=2] - %tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019 ; <%struct..0anon*> [#uses=1] - %tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1] - %tmp95 = load %struct.rtx_def** %tmp9394 ; <%struct.rtx_def*> [#uses=1] - %tmp98 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1] - %tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0 ; <i32*> [#uses=1] - %tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32 ; <i32> [#uses=1] - store i32 %tmp98.c, i32* %tmp101 - %tmp104 = add i32 %j.019, 1 ; <i32> [#uses=2] - %tmp113 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=1] - %tmp114 = getelementptr %struct.rtvec_def* %tmp113, i32 0, i32 0 ; <i32*> [#uses=1] - %tmp115 = load i32* %tmp114 ; <i32> [#uses=1] - %tmp116 = icmp ult i32 %tmp104, %tmp115 ; <i1> [#uses=1] - br i1 %tmp116, label %bb73, label %bb123 - -bb123: ; preds = %bb73, %bb105.preheader, %cond_true47, %bb - %i.0 = add i32 %i.01.0, -1 ; <i32> [#uses=1] - %tmp125 = icmp sgt i32 %i.0, -1 ; <i1> [#uses=1] - %indvar.next26 = add i32 %indvar, 1 ; <i32> [#uses=1] - br i1 %tmp125, label %bb, label %UnifiedReturnBlock - -UnifiedReturnBlock: ; preds = %bb123, %cond_next32, %entry - %UnifiedRetVal = phi %struct.rtx_def* [ null, %entry ], [ %x, %cond_next32 ], [ %x, %bb123 ] ; <%struct.rtx_def*> [#uses=1] - ret %struct.rtx_def* %UnifiedRetVal -} diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll index 721d4c945b..8e315f4d80 100644 --- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll +++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll @@ -35,7 +35,7 @@ cond_next36.i: ; preds = %cond_next.i bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i ; CHECK: %bb.i28.i ; CHECK: addl $2 -; CHECK: addl $2 +; CHECK: addl $-2 %j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2] %din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1] %tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2] diff --git a/test/CodeGen/X86/2009-09-10-SpillComments.ll b/test/CodeGen/X86/2009-09-10-SpillComments.ll index 1dd9990e71..f9ca861c55 100644 --- a/test/CodeGen/X86/2009-09-10-SpillComments.ll +++ b/test/CodeGen/X86/2009-09-10-SpillComments.ll @@ -1,5 +1,11 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s +; This test shouldn't require spills. + +; CHECK: subq $8, %rsp +; CHECK-NOT: $rsp +; CHECK: addq $8, %rsp + %struct..0anon = type { i32 } %struct.rtvec_def = type { i32, [1 x %struct..0anon] } %struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] } @@ -10,9 +16,6 @@ declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def* define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) { entry: -; CHECK: Spill -; CHECK: Folded Spill -; CHECK: Reload %tmp2 = icmp eq %struct.rtx_def* %x, null ; <i1> [#uses=1] br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll index 68575bc401..3bd58b65be 100644 --- a/test/CodeGen/X86/full-lsr.ll +++ b/test/CodeGen/X86/full-lsr.ll @@ -1,6 +1,12 @@ -; RUN: llc < %s -march=x86 -enable-full-lsr >%t -; RUN: grep {addl \\\$4,} %t | count 3 -; RUN: not grep {,%} %t +; RUN: llc < %s -march=x86 >%t + +; TODO: Enhance full lsr mode to get this: +; RUNX: grep {addl \\\$4,} %t | count 3 +; RUNX: not grep {,%} %t + +; For now, it should find this, which is still pretty good: +; RUN: not grep {addl \\\$4,} %t +; RUN: grep {,%} %t | count 6 define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind { entry: diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll index c695c29e06..0410bc0d9a 100644 --- a/test/CodeGen/X86/iv-users-in-other-loops.ll +++ b/test/CodeGen/X86/iv-users-in-other-loops.ll @@ -1,11 +1,11 @@ ; RUN: llc < %s -march=x86-64 -o %t -; RUN: grep inc %t | count 1 +; RUN: not grep inc %t ; RUN: grep dec %t | count 2 -; RUN: grep addq %t | count 13 +; RUN: grep addq %t | count 10 ; RUN: not grep addb %t ; RUN: grep leaq %t | count 9 -; RUN: grep leal %t | count 3 -; RUN: grep movq %t | count 5 +; RUN: grep leal %t | count 2 +; RUN: grep movq %t | count 10 ; IV users in each of the loops from other loops shouldn't cause LSR ; to insert new induction variables. Previously it would create a diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll index 07e46eca75..6c0eb8c0df 100644 --- a/test/CodeGen/X86/loop-strength-reduce4.ll +++ b/test/CodeGen/X86/loop-strength-reduce4.ll @@ -1,5 +1,19 @@ -; RUN: llc < %s -march=x86 | grep cmp | grep 64 -; RUN: llc < %s -march=x86 | not grep inc +; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC +; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC + +; By starting the IV at -64 instead of 0, a cmp is eliminated, +; as the flags from the add can be used directly. + +; STATIC: movl $-64, %ecx + +; STATIC: movl %eax, _state+76(%ecx) +; STATIC: addl $16, %ecx +; STATIC: jne + +; In PIC mode the symbol can't be folded, so the change-compare-stride +; trick applies. + +; PIC: cmpl $64 @state = external global [0 x i32] ; <[0 x i32]*> [#uses=4] @S = external global [0 x i32] ; <[0 x i32]*> [#uses=4] diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll index e14cd8a99e..6b2247d1d6 100644 --- a/test/CodeGen/X86/loop-strength-reduce8.ll +++ b/test/CodeGen/X86/loop-strength-reduce8.ll @@ -1,4 +1,10 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16 +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s + +; CHECK: leal 16(%eax), %edx +; CHECK: align +; CHECK: addl $4, %edx +; CHECK: decl %ecx +; CHECK: jne LBB1_2 %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 } %struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] } diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll new file mode 100644 index 0000000000..a1919bab38 --- /dev/null +++ b/test/CodeGen/X86/lsr-reuse.ll @@ -0,0 +1,159 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s +target datalayout = "e-p:64:64:64" +target triple = "x86_64-unknown-unknown" + +; Full strength reduction reduces register pressure from 5 to 4 here. + +; CHECK: full_me: +; CHECK: movsd (%rsi), %xmm0 +; CHECK: mulsd (%rdx), %xmm0 +; CHECK: movsd %xmm0, (%rdi) +; CHECK: addq $8, %rsi +; CHECK: addq $8, %rdx +; CHECK: addq $8, %rdi +; CHECK: decq %rcx +; CHECK: jne + +define void @full_me(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { +entry: + %t0 = icmp sgt i64 %n, 0 + br i1 %t0, label %loop, label %return + +loop: + %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] + %Ai = getelementptr inbounds double* %A, i64 %i + %Bi = getelementptr inbounds double* %B, i64 %i + %Ci = getelementptr inbounds double* %C, i64 %i + %t1 = load double* %Bi + %t2 = load double* %Ci + %m = fmul double %t1, %t2 + store double %m, double* %Ai + %i.next = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +; In this test, the counting IV exit value is used, so full strength reduction +; would not reduce register pressure. IndVarSimplify ought to simplify such +; cases away, but it's useful here to verify that LSR's register pressure +; heuristics are working as expected. + +; CHECK: count_me_0: +; CHECK: movsd (%rsi,%rax,8), %xmm0 +; CHECK: mulsd (%rdx,%rax,8), %xmm0 +; CHECK: movsd %xmm0, (%rdi,%rax,8) +; CHECK: incq %rax +; CHECK: cmpq %rax, %rcx +; CHECK: jne + +define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { +entry: + %t0 = icmp sgt i64 %n, 0 + br i1 %t0, label %loop, label %return + +loop: + %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] + %Ai = getelementptr inbounds double* %A, i64 %i + %Bi = getelementptr inbounds double* %B, i64 %i + %Ci = getelementptr inbounds double* %C, i64 %i + %t1 = load double* %Bi + %t2 = load double* %Ci + %m = fmul double %t1, %t2 + store double %m, double* %Ai + %i.next = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %return, label %loop + +return: + %q = phi i64 [ 0, %entry ], [ %i.next, %loop ] + ret i64 %q +} + +; In this test, the trip count value is used, so full strength reduction +; would not reduce register pressure. +; (though it would reduce register pressure inside the loop...) + +; CHECK: count_me_1: +; CHECK: movsd (%rsi,%rax,8), %xmm0 +; CHECK: mulsd (%rdx,%rax,8), %xmm0 +; CHECK: movsd %xmm0, (%rdi,%rax,8) +; CHECK: incq %rax +; CHECK: cmpq %rax, %rcx +; CHECK: jne + +define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { +entry: + %t0 = icmp sgt i64 %n, 0 + br i1 %t0, label %loop, label %return + +loop: + %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] + %Ai = getelementptr inbounds double* %A, i64 %i + %Bi = getelementptr inbounds double* %B, i64 %i + %Ci = getelementptr inbounds double* %C, i64 %i + %t1 = load double* %Bi + %t2 = load double* %Ci + %m = fmul double %t1, %t2 + store double %m, double* %Ai + %i.next = add nsw i64 %i, 1 + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %return, label %loop + +return: + %q = phi i64 [ 0, %entry ], [ %n, %loop ] + ret i64 %q +} + +; This should be fully strength-reduced to reduce register pressure, however +; the current heuristics get distracted by all the reuse with the stride-1 +; induction variable first. + +; But even so, be clever and start the stride-1 variable at a non-zero value +; to eliminate an in-loop immediate value. + +; CHECK: count_me_2: +; CHECK: movl $5, %eax +; CHECK: align +; CHECK: BB4_1: +; CHECK: movsd (%rdi,%rax,8), %xmm0 +; CHECK: addsd (%rsi,%rax,8), %xmm0 +; CHECK: movsd %xmm0, (%rdx,%rax,8) +; CHECK: movsd 40(%rdi,%rax,8), %xmm0 +; CHECK: addsd 40(%rsi,%rax,8), %xmm0 +; CHECK: movsd %xmm0, 40(%rdx,%rax,8) +; CHECK: incq %rax +; CHECK: cmpq $5005, %rax +; CHECK: jne + +define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind { +entry: + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop ] + %i5 = add i64 %i, 5 + %Ai = getelementptr double* %A, i64 %i5 + %t2 = load double* %Ai + %Bi = getelementptr double* %B, i64 %i5 + %t4 = load double* %Bi + %t5 = fadd double %t2, %t4 + %Ci = getelementptr double* %C, i64 %i5 + store double %t5, double* %Ci + %i10 = add i64 %i, 10 + %Ai10 = getelementptr double* %A, i64 %i10 + %t9 = load double* %Ai10 + %Bi10 = getelementptr double* %B, i64 %i10 + %t11 = load double* %Bi10 + %t12 = fadd double %t9, %t11 + %Ci10 = getelementptr double* %C, i64 %i10 + store double %t12, double* %Ci10 + %i.next = add i64 %i, 1 + %exitcond = icmp eq i64 %i.next, 5000 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll index bc493bd8f7..7111d687ed 100644 --- a/test/CodeGen/X86/masked-iv-safe.ll +++ b/test/CodeGen/X86/masked-iv-safe.ll @@ -4,9 +4,9 @@ ; RUN: not grep sar %t ; RUN: not grep shl %t ; RUN: grep add %t | count 2 -; RUN: grep inc %t | count 4 +; RUN: grep inc %t | count 3 ; RUN: grep dec %t | count 2 -; RUN: grep lea %t | count 2 +; RUN: grep lea %t | count 3 ; Optimize away zext-inreg and sext-inreg on the loop induction ; variable using trip-count information. @@ -127,6 +127,9 @@ return: ret void } +; TODO: If we could handle all the loads and stores as post-inc users, we could +; use {-1,+,1} in the induction variable register, and we'd get another inc, +; one fewer add, and a comparison with zero. define void @another_count_up(double* %d, i64 %n) nounwind { entry: br label %loop diff --git a/test/CodeGen/X86/pr3495-2.ll b/test/CodeGen/X86/pr3495-2.ll index 1372a1522b..71aa5a0488 100644 --- a/test/CodeGen/X86/pr3495-2.ll +++ b/test/CodeGen/X86/pr3495-2.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited} +target datalayout = "e-p:32:32:32" target triple = "i386-apple-darwin9.6" %struct.constraintVCGType = type { i32, i32, i32, i32 } %struct.nodeVCGType = type { %struct.constraintVCGType*, i32, i32, i32, %struct.constraintVCGType*, i32, i32, i32 } diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll index c4f768ca52..5fb445c935 100644 --- a/test/CodeGen/X86/remat-mov-0.ll +++ b/test/CodeGen/X86/remat-mov-0.ll @@ -1,13 +1,33 @@ -; RUN: llc < %s -march=x86-64 | grep {xorl %edi, %edi} | count 4 +; RUN: llc < %s -march=x86-64 | FileCheck %s ; CodeGen should remat the zero instead of spilling it. declare void @foo(i64 %p) +; CHECK: bar: +; CHECK: xorl %edi, %edi +; CHECK: xorl %edi, %edi define void @bar() nounwind { call void @foo(i64 0) call void @foo(i64 0) - call void @foo(i64 0) - call void @foo(i64 0) ret void } + +; CHECK: bat: +; CHECK: movq $-1, %rdi +; CHECK: movq $-1, %rdi +define void @bat() nounwind { + call void @foo(i64 -1) + call void @foo(i64 -1) + ret void +} + +; CHECK: bau: +; CHECK: movl $1, %edi +; CHECK: movl $1, %edi +define void @bau() nounwind { + call void @foo(i64 1) + call void @foo(i64 1) + ret void +} + diff --git a/test/CodeGen/X86/remat-mov-1.ll b/test/CodeGen/X86/remat-mov-1.ll deleted file mode 100644 index d71b7a5b91..0000000000 --- a/test/CodeGen/X86/remat-mov-1.ll +++ /dev/null @@ -1,40 +0,0 @@ -; RUN: llc < %s -march=x86 | grep -- -1 | grep mov | count 2 - - %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } - %struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* } - %struct._CompT = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, i8, %struct._PixT*, %struct._CompT*, i8, %struct._CompT* } - %struct._PixT = type { i32, i32, %struct._PixT* } - %struct.__sFILEX = type opaque - %struct.__sbuf = type { i8*, i32 } - -declare fastcc void @MergeComponents(%struct._CompT*, %struct._CompT*, %struct._CompT*, %struct._CompT**, %struct.ImgT*) nounwind - -define fastcc void @MergeToLeft(%struct._CompT* %comp, %struct._CompT** %head, %struct.ImgT* %img) nounwind { -entry: - br label %bb208 - -bb105: ; preds = %bb200 - br i1 false, label %bb197, label %bb149 - -bb149: ; preds = %bb105 - %tmp151 = getelementptr %struct._CompT* %comp, i32 0, i32 0 ; <i32*> [#uses=1] - br label %bb193 - -bb193: ; preds = %bb184, %bb149 - %tmp196 = load i32* %tmp151, align 4 ; <i32> [#uses=1] - br label %bb197 - -bb197: ; preds = %bb193, %bb105 - %last_comp.0 = phi i32 [ %tmp196, %bb193 ], [ 0, %bb105 ] ; <i32> [#uses=0] - %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] - br label %bb200 - -bb200: ; preds = %bb208, %bb197 - %indvar = phi i32 [ 0, %bb208 ], [ %indvar.next, %bb197 ] ; <i32> [#uses=2] - %xm.0 = sub i32 %indvar, 0 ; <i32> [#uses=1] - %tmp202 = icmp slt i32 %xm.0, 1 ; <i1> [#uses=1] - br i1 %tmp202, label %bb105, label %bb208 - -bb208: ; preds = %bb200, %entry - br label %bb200 -} diff --git a/test/CodeGen/X86/subreg-to-reg-5.ll b/test/CodeGen/X86/subreg-to-reg-5.ll deleted file mode 100644 index ba4c307d10..0000000000 --- a/test/CodeGen/X86/subreg-to-reg-5.ll +++ /dev/null @@ -1,35 +0,0 @@ -; RUN: llc < %s -march=x86-64 > %t -; RUN: grep addl %t -; RUN: not egrep {movl|movq} %t - -define float @foo(float* %B) nounwind { -entry: - br label %bb2 - -bb2: ; preds = %bb3, %entry - %B_addr.0.rec = phi i64 [ %indvar.next154, %bb3 ], [ 0, %entry ] ; <i64> [#uses=2] - %z = icmp slt i64 %B_addr.0.rec, 20000 - br i1 %z, label %bb3, label %bb4 - -bb3: ; preds = %bb2 - %indvar.next154 = add i64 %B_addr.0.rec, 1 ; <i64> [#uses=1] - br label %bb2 - -bb4: ; preds = %bb2 - %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec ; <float*> [#uses=1] - %t1 = ptrtoint float* %B_addr.0 to i64 ; <i64> [#uses=1] - %t2 = and i64 %t1, 4294967295 ; <i64> [#uses=1] - %t3 = icmp eq i64 %t2, 0 ; <i1> [#uses=1] - br i1 %t3, label %bb5, label %bb10.preheader - -bb10.preheader: ; preds = %bb4 - br label %bb9 - -bb5: ; preds = %bb4 - ret float 7.0 - -bb9: ; preds = %bb10.preheader - %t5 = getelementptr float* %B, i64 0 ; <float*> [#uses=1] - %t7 = load float* %t5 ; <float> [#uses=1] - ret float %t7 -} |