diff options
Diffstat (limited to 'test/Transforms/LoopVectorize')
98 files changed, 945 insertions, 174 deletions
diff --git a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll index 1e1396f800..a292afbd58 100644 --- a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll +++ b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll index aa7cc0ee32..b3eae69042 100644 --- a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll +++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce ; Check that we don't fall into an infinite loop. define void @test() nounwind { diff --git a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll index ae9f998fee..16d64eab25 100644 --- a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll +++ b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -force-vector-unroll=1 -force-vector-width=4 +; RUN: opt < %s -loop-vectorize -dce -force-vector-interleave=1 -force-vector-width=4 ; Check that we don't crash. diff --git a/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll b/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll new file mode 100644 index 0000000000..a01d54312a --- /dev/null +++ b/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll @@ -0,0 +1,31 @@ +; RUN: opt < %s -loop-vectorize -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 -S | FileCheck %s +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +%struct.anon = type { [100 x i32], i32, [100 x i32] } + +@Foo = common global %struct.anon zeroinitializer, align 4 + +; CHECK-LABEL: @foo( +; CHECK: load <4 x i32>* +; CHECK: sdiv <4 x i32> +; CHECK: store <4 x i32> + +define void @foo(){ +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %div = sdiv i32 %0, 2 + %arrayidx2 = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv + store i32 %div, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 100 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll index fce3b70f59..9c69ba87f3 100644 --- a/test/Transforms/LoopVectorize/X86/already-vectorized.ll +++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll @@ -41,6 +41,6 @@ for.end: ; preds = %for.body ; Now, we check for the Hint metadata ; CHECK: [[vect]] = metadata !{metadata [[vect]], metadata [[width:![0-9]+]], metadata [[unroll:![0-9]+]]} ; CHECK: [[width]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 1} -; CHECK: [[unroll]] = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1} +; CHECK: [[unroll]] = metadata !{metadata !"llvm.loop.interleave.count", i32 1} ; CHECK: [[scalar]] = metadata !{metadata [[scalar]], metadata [[width]], metadata [[unroll]]} diff --git a/test/Transforms/LoopVectorize/X86/assume.ll b/test/Transforms/LoopVectorize/X86/assume.ll new file mode 100644 index 0000000000..a94e24dd7e --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/assume.ll @@ -0,0 +1,100 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 { +entry: + br label %for.body + +; CHECK-LABEL: @test1 +; CHECK: vector.body: +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: for.body: +; CHECK: ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp1 = fcmp ogt float %0, 1.000000e+02 + tail call void @llvm.assume(i1 %cmp1) + %add = fadd float %0, 1.000000e+00 + %arrayidx5 = getelementptr inbounds float* %a, i64 %indvars.iv + store float %add, float* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv, 1599 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Function Attrs: nounwind +declare void @llvm.assume(i1) #1 + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } + +%struct.data = type { float*, float* } + +; Function Attrs: nounwind uwtable +define void @test2(%struct.data* nocapture readonly %d) #0 { +entry: + %b = getelementptr inbounds %struct.data* %d, i64 0, i32 1 + %0 = load float** %b, align 8 + %ptrint = ptrtoint float* %0 to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + %a = getelementptr inbounds %struct.data* %d, i64 0, i32 0 + %1 = load float** %a, align 8 + %ptrint2 = ptrtoint float* %1 to i64 + %maskedptr3 = and i64 %ptrint2, 31 + %maskcond4 = icmp eq i64 %maskedptr3, 0 + br label %for.body + +; CHECK-LABEL: @test2 +; CHECK: vector.body: +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: for.body: +; CHECK: ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + tail call void @llvm.assume(i1 %maskcond) + %arrayidx = getelementptr inbounds float* %0, i64 %indvars.iv + %2 = load float* %arrayidx, align 4 + %add = fadd float %2, 1.000000e+00 + tail call void @llvm.assume(i1 %maskcond4) + %arrayidx5 = getelementptr inbounds float* %1, i64 %indvars.iv + store float %add, float* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv, 1599 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll index e1113fdd91..05403cd5b6 100644 --- a/test/Transforms/LoopVectorize/X86/gcc-examples.ll +++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -S | FileCheck %s -; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-unroll=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-interleave=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll index d6120e76cc..0650d9476e 100644 --- a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll index 8716cff777..fd69dc4607 100644 --- a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll +++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll index 2c47fcb4d3..0b542a9ef7 100644 --- a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll +++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll index 7e156a9eda..b580d738ff 100644 --- a/test/Transforms/LoopVectorize/X86/parallel-loops.ll +++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LoopVectorize/X86/powof2div.ll b/test/Transforms/LoopVectorize/X86/powof2div.ll new file mode 100644 index 0000000000..054da8ed20 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/powof2div.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.anon = type { [100 x i32], i32, [100 x i32] } + +@Foo = common global %struct.anon zeroinitializer, align 4 + +;CHECK-LABEL: @foo( +;CHECK: load <4 x i32>* +;CHECK: sdiv <4 x i32> +;CHECK: store <4 x i32> + +define void @foo(){ +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %div = sdiv i32 %0, 2 + %arrayidx2 = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv + store i32 %div, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 100 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll index dfa4faaf09..f9a028164d 100644 --- a/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/test/Transforms/LoopVectorize/X86/small-size.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/X86/tripcount.ll b/test/Transforms/LoopVectorize/X86/tripcount.ll index 6b38bacf88..a4ec6948a2 100644 --- a/test/Transforms/LoopVectorize/X86/tripcount.ll +++ b/test/Transforms/LoopVectorize/X86/tripcount.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -mcpu=prescott < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -mcpu=prescott < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" target triple = "i386-unknown-freebsd11.0" diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll index d5024bb132..716dc08a55 100644 --- a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll +++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll @@ -1,6 +1,6 @@ -; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S \ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-interleave=0 -dce -S \ ; RUN: | FileCheck %s --check-prefix=CHECK-VECTOR -; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=1 -force-vector-unroll=0 -dce -S \ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=1 -force-vector-interleave=0 -dce -S \ ; RUN: | FileCheck %s --check-prefix=CHECK-SCALAR target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/test/Transforms/LoopVectorize/X86/unroll_selection.ll index 2d7b663804..c684b4e79b 100644 --- a/test/Transforms/LoopVectorize/X86/unroll_selection.ll +++ b/test/Transforms/LoopVectorize/X86/unroll_selection.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-interleave=0 -dce -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll index 59bb8d0054..e57cfefec0 100644 --- a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll +++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -mcpu=corei7-avx -debug -S < %s 2>&1 | FileCheck %s +; RUN: opt -basicaa -loop-vectorize -mcpu=corei7-avx -debug -S < %s 2>&1 | FileCheck %s ; REQUIRES: asserts target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll index 6cdd29bda2..7bce11d32e 100644 --- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -24,10 +24,11 @@ ; File, line, and column should match those specified in the metadata ; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations -; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization was not specified +; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info ; CHECK: remark: source.cpp:13:5: loop not vectorized: vector width and interleave count are explicitly set to 1 ; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds -; CHECK: remark: source.cpp:19:5: loop not vectorized: vectorization is explicitly enabled +; CHECK: remark: source.cpp:19:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info +; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization ; CHECK: _Z4testPii ; CHECK-NOT: x i32> @@ -121,40 +122,40 @@ attributes #0 = { nounwind } !llvm.module.flags = !{!9, !10} !llvm.ident = !{!11} -!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} +!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\006\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{metadata !"source.cpp", metadata !"."} !2 = metadata !{} !3 = metadata !{metadata !4, metadata !7, metadata !8} -!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 1} -!5 = metadata !{i32 786473, metadata !1} -!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} -!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_disabled", metadata !"test_disabled", metadata !"", i32 10, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2, i32 10} -!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_array_bounds", metadata !"test_array_bounds", metadata !"", i32 16, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2, i32 16} +!4 = metadata !{metadata !"0x2e\00test\00test\00\001\000\001\000\006\00256\001\001", metadata !1, metadata !5, metadata !6, null, void (i32*, i32)* @_Z4testPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [test] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] [./source.cpp] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{metadata !"0x2e\00test_disabled\00test_disabled\00\0010\000\001\000\006\00256\001\0010", metadata !1, metadata !5, metadata !6, null, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 10] [def] [test_disabled] +!8 = metadata !{metadata !"0x2e\00test_array_bounds\00test_array_bounds\00\0016\000\001\000\006\00256\001\0016", metadata !1, metadata !5, metadata !6, null, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 16] [def] [test_array_bounds] !9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} -!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 2} !11 = metadata !{metadata !"clang version 3.5.0"} !12 = metadata !{i32 3, i32 8, metadata !13, null} -!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0} +!13 = metadata !{metadata !"0xb\003\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] !14 = metadata !{metadata !14, metadata !15, metadata !15} !15 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} !16 = metadata !{i32 4, i32 5, metadata !17, null} -!17 = metadata !{i32 786443, metadata !1, metadata !13, i32 3, i32 36, i32 0, i32 1} +!17 = metadata !{metadata !"0xb\003\0036\000", metadata !1, metadata !13} ; [ DW_TAG_lexical_block ] !18 = metadata !{metadata !19, metadata !19, i64 0} !19 = metadata !{metadata !"int", metadata !20, i64 0} !20 = metadata !{metadata !"omnipotent char", metadata !21, i64 0} !21 = metadata !{metadata !"Simple C/C++ TBAA"} !22 = metadata !{i32 5, i32 9, metadata !23, null} -!23 = metadata !{i32 786443, metadata !1, metadata !17, i32 5, i32 9, i32 0, i32 2} +!23 = metadata !{metadata !"0xb\005\009\000", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] !24 = metadata !{i32 8, i32 1, metadata !4, null} !25 = metadata !{i32 12, i32 8, metadata !26, null} -!26 = metadata !{i32 786443, metadata !1, metadata !7, i32 12, i32 3, i32 0, i32 3} +!26 = metadata !{metadata !"0xb\0012\003\000", metadata !1, metadata !7} ; [ DW_TAG_lexical_block ] !27 = metadata !{metadata !27, metadata !28, metadata !29} -!28 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1} +!28 = metadata !{metadata !"llvm.loop.interleave.count", i32 1} !29 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1} !30 = metadata !{i32 13, i32 5, metadata !26, null} !31 = metadata !{i32 14, i32 1, metadata !7, null} !32 = metadata !{i32 18, i32 8, metadata !33, null} -!33 = metadata !{i32 786443, metadata !1, metadata !8, i32 18, i32 3, i32 0, i32 4} +!33 = metadata !{metadata !"0xb\0018\003\000", metadata !1, metadata !8} ; [ DW_TAG_lexical_block ] !34 = metadata !{metadata !34, metadata !15} !35 = metadata !{i32 19, i32 5, metadata !33, null} !36 = metadata !{i32 20, i32 1, metadata !8, null} diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll index f6834477ff..14e541a56e 100644 --- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll +++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s -; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s -; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s ; This code has all the !dbg annotations needed to track source line information, ; but is missing the llvm.dbg.cu annotation. This prevents code generation from @@ -52,23 +52,23 @@ declare void @ibar(i32*) #1 !1 = metadata !{metadata !"vectorization-remarks.c", metadata !"."} !2 = metadata !{} !3 = metadata !{metadata !4} -!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @foo, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo] -!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [./vectorization-remarks.c] -!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!4 = metadata !{metadata !"0x2e\00foo\00foo\00\005\000\001\000\006\00256\001\006", metadata !1, metadata !5, metadata !6, null, i32 (i32)* @foo, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] [./vectorization-remarks.c] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} -!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 2} !9 = metadata !{metadata !"clang version 3.5.0 "} -!10 = metadata !{i32 8, i32 3, metadata !4, null} ; [ DW_TAG_imported_declaration ] +!10 = metadata !{i32 8, i32 3, metadata !4, null} !11 = metadata !{metadata !12, metadata !12, i64 0} !12 = metadata !{metadata !"int", metadata !13, i64 0} !13 = metadata !{metadata !"omnipotent char", metadata !14, i64 0} !14 = metadata !{metadata !"Simple C/C++ TBAA"} !15 = metadata !{i32 17, i32 8, metadata !16, null} -!16 = metadata !{i32 786443, metadata !1, metadata !17, i32 17, i32 8, i32 2, i32 3} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] -!17 = metadata !{i32 786443, metadata !1, metadata !18, i32 17, i32 8, i32 1, i32 2} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] -!18 = metadata !{i32 786443, metadata !1, metadata !4, i32 17, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!16 = metadata !{metadata !"0xb\0017\008\002", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!17 = metadata !{metadata !"0xb\0017\008\001", metadata !1, metadata !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!18 = metadata !{metadata !"0xb\0017\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] !19 = metadata !{i32 18, i32 5, metadata !20, null} -!20 = metadata !{i32 786443, metadata !1, metadata !18, i32 17, i32 27, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!20 = metadata !{metadata !"0xb\0017\0027\000", metadata !1, metadata !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] !21 = metadata !{metadata !13, metadata !13, i64 0} !22 = metadata !{i32 20, i32 3, metadata !4, null} !23 = metadata !{i32 21, i32 3, metadata !4, null} diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll index efc93d94a7..d8e5403d2d 100644 --- a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll +++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll @@ -1,4 +1,4 @@ -; RUN: opt -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S < %s | FileCheck %s +; RUN: opt -O3 -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.7.0" diff --git a/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll index a099daa740..cab333d064 100644 --- a/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll +++ b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -S -mtriple=xcore | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S -mtriple=xcore | FileCheck %s target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32" target triple = "xcore" diff --git a/test/Transforms/LoopVectorize/align.ll b/test/Transforms/LoopVectorize/align.ll index 84b03615d3..f2fb8b91b4 100644 --- a/test/Transforms/LoopVectorize/align.ll +++ b/test/Transforms/LoopVectorize/align.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/bsd_regex.ll b/test/Transforms/LoopVectorize/bsd_regex.ll index 7b712729a1..7a3e79893d 100644 --- a/test/Transforms/LoopVectorize/bsd_regex.ll +++ b/test/Transforms/LoopVectorize/bsd_regex.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=2 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=2 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll index 2648bbea80..d7cbad06dd 100644 --- a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll +++ b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/calloc.ll b/test/Transforms/LoopVectorize/calloc.ll index 7e79916164..5f441f3e23 100644 --- a/test/Transforms/LoopVectorize/calloc.ll +++ b/test/Transforms/LoopVectorize/calloc.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/cast-induction.ll b/test/Transforms/LoopVectorize/cast-induction.ll index 255ce9c77e..4f92d33ff5 100644 --- a/test/Transforms/LoopVectorize/cast-induction.ll +++ b/test/Transforms/LoopVectorize/cast-induction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s ; rdar://problem/12848162 diff --git a/test/Transforms/LoopVectorize/conditional-assignment.ll b/test/Transforms/LoopVectorize/conditional-assignment.ll new file mode 100644 index 0000000000..50fa329b98 --- /dev/null +++ b/test/Transforms/LoopVectorize/conditional-assignment.ll @@ -0,0 +1,58 @@ +; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s + +; CHECK: remark: source.c:2:8: loop not vectorized: store that is conditionally executed prevents vectorization + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; Function Attrs: nounwind ssp uwtable +define void @conditional_store(i32* noalias nocapture %indices) #0 { +entry: + br label %for.body, !dbg !10 + +for.body: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds i32* %indices, i64 %indvars.iv, !dbg !12 + %0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !14 + %cmp1 = icmp eq i32 %0, 1024, !dbg !12 + br i1 %cmp1, label %if.then, label %for.inc, !dbg !12 + +if.then: ; preds = %for.body + store i32 0, i32* %arrayidx, align 4, !dbg !18, !tbaa !14 + br label %for.inc, !dbg !18 + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10 + %exitcond = icmp eq i64 %indvars.iv.next, 4096, !dbg !10 + br i1 %exitcond, label %for.end, label %for.body, !dbg !10 + +for.end: ; preds = %for.inc + ret void, !dbg !19 +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = metadata !{metadata !"0x11\0012\00clang version 3.6.0\001\00\000\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] +!1 = metadata !{metadata !"source.c", metadata !"."} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{metadata !"0x2e\00conditional_store\00conditional_store\00\001\000\001\000\006\00256\001\001", metadata !1, metadata !5, metadata !6, null, void (i32*)* @conditional_store, null, null, metadata !2} ; [ DW_TAG_subprogram ] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] +!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} +!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 2} +!9 = metadata !{metadata !"clang version 3.6.0"} +!10 = metadata !{i32 2, i32 8, metadata !11, null} +!11 = metadata !{metadata !"0xb\002\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] +!12 = metadata !{i32 3, i32 9, metadata !13, null} +!13 = metadata !{metadata !"0xb\003\009\000", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ] +!14 = metadata !{metadata !15, metadata !15, i64 0} +!15 = metadata !{metadata !"int", metadata !16, i64 0} +!16 = metadata !{metadata !"omnipotent char", metadata !17, i64 0} +!17 = metadata !{metadata !"Simple C/C++ TBAA"} +!18 = metadata !{i32 3, i32 29, metadata !13, null} +!19 = metadata !{i32 4, i32 1, metadata !4, null} diff --git a/test/Transforms/LoopVectorize/control-flow.ll b/test/Transforms/LoopVectorize/control-flow.ll index e4ba77fa3d..452b7ae823 100644 --- a/test/Transforms/LoopVectorize/control-flow.ll +++ b/test/Transforms/LoopVectorize/control-flow.ll @@ -11,7 +11,7 @@ ; } ; CHECK: remark: source.cpp:5:9: loop not vectorized: loop control flow is not understood by vectorizer -; CHECK: remark: source.cpp:5:9: loop not vectorized: vectorization was not specified +; CHECK: remark: source.cpp:5:9: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info ; CHECK: _Z4testPii ; CHECK-NOT: x i32> @@ -55,21 +55,21 @@ attributes #0 = { nounwind } !llvm.module.flags = !{!7, !8} !llvm.ident = !{!9} -!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} +!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\006\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{metadata !"source.cpp", metadata !"."} !2 = metadata !{} !3 = metadata !{metadata !4} -!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 2} -!5 = metadata !{i32 786473, metadata !1} -!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} +!4 = metadata !{metadata !"0x2e\00test\00test\00\001\000\001\000\006\00256\001\002", metadata !1, metadata !5, metadata !6, null, i32 (i32*, i32)* @_Z4testPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [test] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] [./source.cpp] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} -!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 2} !9 = metadata !{metadata !"clang version 3.5.0"} !10 = metadata !{i32 3, i32 8, metadata !11, null} -!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0} +!11 = metadata !{metadata !"0xb\003\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] !12 = metadata !{i32 5, i32 9, metadata !13, null} -!13 = metadata !{i32 786443, metadata !1, metadata !14, i32 5, i32 9, i32 0, i32 2} -!14 = metadata !{i32 786443, metadata !1, metadata !11, i32 4, i32 3, i32 0, i32 1} +!13 = metadata !{metadata !"0xb\005\009\000", metadata !1, metadata !14} ; [ DW_TAG_lexical_block ] +!14 = metadata !{metadata !"0xb\004\003\000", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ] !15 = metadata !{metadata !16, metadata !16, i64 0} !16 = metadata !{metadata !"int", metadata !17, i64 0} !17 = metadata !{metadata !"omnipotent char", metadata !18, i64 0} diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll index c8215a107d..f32f610b1c 100644 --- a/test/Transforms/LoopVectorize/cpp-new-array.ll +++ b/test/Transforms/LoopVectorize/cpp-new-array.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll index 2497b25ea1..91d07d409e 100644 --- a/test/Transforms/LoopVectorize/dbg.value.ll +++ b/test/Transforms/LoopVectorize/dbg.value.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine | FileCheck %s +; RUN: opt < %s -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine | FileCheck %s ; Make sure we vectorize with debugging turned on. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.8.0" ; CHECK-LABEL: @test( define i32 @test() #0 { entry: - tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !9), !dbg !18 + tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !9, metadata !{}), !dbg !18 br label %for.body, !dbg !18 for.body: @@ -25,7 +25,7 @@ for.body: %arrayidx4 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !19 store i32 %add, i32* %arrayidx4, align 4, !dbg !19 %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !18 - tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9), !dbg !18 + tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9, metadata !{}), !dbg !18 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !18 %exitcond = icmp ne i32 %lftr.wideiv, 1024, !dbg !18 br i1 %exitcond, label %for.body, label %for.end, !dbg !18 @@ -34,9 +34,9 @@ for.end: ret i32 0, !dbg !24 } -declare void @llvm.dbg.declare(metadata, metadata) #1 +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 -declare void @llvm.dbg.value(metadata, i64, metadata) #1 +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "relocation-model"="pic" "ssp-buffers-size"="8" } attributes #1 = { nounwind readnone } @@ -44,27 +44,27 @@ attributes #1 = { nounwind readnone } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!26} -!0 = metadata !{i32 786449, metadata !25, i32 4, metadata !"clang", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !11, null, metadata !""} +!0 = metadata !{metadata !"0x11\004\00clang\001\00\000\00\000", metadata !25, metadata !1, metadata !1, metadata !2, metadata !11, null} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !2 = metadata !{metadata !3} -!3 = metadata !{i32 786478, metadata !25, metadata !4, metadata !"test", metadata !"test", metadata !"test", i32 5, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @test, null, null, metadata !8, i32 5} -!4 = metadata !{i32 786473, metadata !25} -!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!3 = metadata !{metadata !"0x2e\00test\00test\00test\005\000\001\000\006\00256\001\005", metadata !25, metadata !4, metadata !5, null, i32 ()* @test, null, null, metadata !8} ; [ DW_TAG_subprogram ] +!4 = metadata !{metadata !"0x29", metadata !25} ; [ DW_TAG_file_type ] +!5 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !6 = metadata !{metadata !7} -!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} +!7 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] !8 = metadata !{metadata !9} -!9 = metadata !{i32 786688, metadata !10, metadata !"i", metadata !4, i32 6, metadata !7, i32 0, i32 0} -!10 = metadata !{i32 786443, metadata !25, metadata !3, i32 6, i32 0, i32 0} +!9 = metadata !{metadata !"0x100\00i\006\000", metadata !10, metadata !4, metadata !7} ; [ DW_TAG_auto_variable ] +!10 = metadata !{metadata !"0xb\006\000\000", metadata !25, metadata !3} ; [ DW_TAG_lexical_block ] !11 = metadata !{metadata !12, metadata !16, metadata !17} -!12 = metadata !{i32 786484, i32 0, null, metadata !"A", metadata !"A", metadata !"", metadata !4, i32 1, metadata !13, i32 0, i32 1, [1024 x i32]* @A, null} -!13 = metadata !{i32 786433, null, null, null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32768, align 32, offset 0] [from int] +!12 = metadata !{metadata !"0x34\00A\00A\00\001\000\001", null, metadata !4, metadata !13, [1024 x i32]* @A, null} ; [ DW_TAG_variable ] +!13 = metadata !{metadata !"0x1\00\000\0032768\0032\000\000", null, null, metadata !7, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32768, align 32, offset 0] [from int] !14 = metadata !{metadata !15} !15 = metadata !{i32 786465, i64 0, i64 1024} -!16 = metadata !{i32 786484, i32 0, null, metadata !"B", metadata !"B", metadata !"", metadata !4, i32 2, metadata !13, i32 0, i32 1, [1024 x i32]* @B, null} -!17 = metadata !{i32 786484, i32 0, null, metadata !"C", metadata !"C", metadata !"", metadata !4, i32 3, metadata !13, i32 0, i32 1, [1024 x i32]* @C, null} +!16 = metadata !{metadata !"0x34\00B\00B\00\002\000\001", null, metadata !4, metadata !13, [1024 x i32]* @B, null} ; [ DW_TAG_variable ] +!17 = metadata !{metadata !"0x34\00C\00C\00\003\000\001", null, metadata !4, metadata !13, [1024 x i32]* @C, null} ; [ DW_TAG_variable ] !18 = metadata !{i32 6, i32 0, metadata !10, null} !19 = metadata !{i32 7, i32 0, metadata !20, null} -!20 = metadata !{i32 786443, metadata !25, metadata !10, i32 6, i32 0, i32 1} +!20 = metadata !{metadata !"0xb\006\000\001", metadata !25, metadata !10} ; [ DW_TAG_lexical_block ] !24 = metadata !{i32 9, i32 0, metadata !3, null} !25 = metadata !{metadata !"test", metadata !"/path/to/somewhere"} -!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 2} diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll index bf0b4184b7..6350296a7d 100644 --- a/test/Transforms/LoopVectorize/debugloc.ll +++ b/test/Transforms/LoopVectorize/debugloc.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 | FileCheck %s +; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -19,10 +19,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @f(i32* nocapture %a, i32 %size) #0 { entry: - tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13), !dbg !19 - tail call void @llvm.dbg.value(metadata !{i32 %size}, i64 0, metadata !14), !dbg !19 - tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15), !dbg !20 - tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16), !dbg !21 + tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13, metadata !{}), !dbg !19 + tail call void @llvm.dbg.value(metadata !{i32 %size}, i64 0, metadata !14, metadata !{}), !dbg !19 + tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15, metadata !{}), !dbg !20 + tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16, metadata !{}), !dbg !21 %cmp4 = icmp eq i32 %size, 0, !dbg !21 br i1 %cmp4, label %for.end, label %for.body.lr.ph, !dbg !21 @@ -35,9 +35,9 @@ for.body: ; preds = %for.body.lr.ph, %fo %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv, !dbg !22 %0 = load i32* %arrayidx, align 4, !dbg !22 %add = add i32 %0, %sum.05, !dbg !22 - tail call void @llvm.dbg.value(metadata !{i32 %add.lcssa}, i64 0, metadata !15), !dbg !22 + tail call void @llvm.dbg.value(metadata !{i32 %add.lcssa}, i64 0, metadata !15, metadata !{}), !dbg !22 %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !21 - tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !16), !dbg !21 + tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !16, metadata !{}), !dbg !21 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21 %exitcond = icmp ne i32 %lftr.wideiv, %size, !dbg !21 br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge, !dbg !21 @@ -52,10 +52,10 @@ for.end: ; preds = %entry, %for.cond.fo } ; Function Attrs: nounwind readnone -declare void @llvm.dbg.declare(metadata, metadata) #1 +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 ; Function Attrs: nounwind readnone -declare void @llvm.dbg.value(metadata, i64, metadata) #1 +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 attributes #0 = { nounwind readonly ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" } attributes #1 = { nounwind readnone } @@ -63,28 +63,28 @@ attributes #1 = { nounwind readnone } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!18, !27} -!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 185038) (llvm/trunk 185097)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99] +!0 = metadata !{metadata !"0x11\0012\00clang version 3.4 (trunk 185038) (llvm/trunk 185097)\001\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99] !1 = metadata !{metadata !"-", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"} !2 = metadata !{i32 0} !3 = metadata !{metadata !4} -!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @f, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f] +!4 = metadata !{metadata !"0x2e\00f\00f\00\003\000\001\000\006\00256\001\003", metadata !5, metadata !6, metadata !7, null, i32 (i32*, i32)* @f, null, null, metadata !12} ; [ DW_TAG_subprogram ] [line 3] [def] [f] !5 = metadata !{metadata !"<stdin>", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"} -!6 = metadata !{i32 786473, metadata !5} ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>] -!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!6 = metadata !{metadata !"0x29", metadata !5} ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>] +!7 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{metadata !9, metadata !10, metadata !11} -!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] -!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int] -!11 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned] +!9 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!10 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, null, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int] +!11 = metadata !{metadata !"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned] !12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16} -!13 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3] -!14 = metadata !{i32 786689, metadata !4, metadata !"size", metadata !6, i32 33554435, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [size] [line 3] -!15 = metadata !{i32 786688, metadata !4, metadata !"sum", metadata !6, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 4] -!16 = metadata !{i32 786688, metadata !17, metadata !"i", metadata !6, i32 5, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 5] -!17 = metadata !{i32 786443, metadata !5, metadata !4, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>] +!13 = metadata !{metadata !"0x101\00a\0016777219\000", metadata !4, metadata !6, metadata !10} ; [ DW_TAG_arg_variable ] [a] [line 3] +!14 = metadata !{metadata !"0x101\00size\0033554435\000", metadata !4, metadata !6, metadata !11} ; [ DW_TAG_arg_variable ] [size] [line 3] +!15 = metadata !{metadata !"0x100\00sum\004\000", metadata !4, metadata !6, metadata !11} ; [ DW_TAG_auto_variable ] [sum] [line 4] +!16 = metadata !{metadata !"0x100\00i\005\000", metadata !17, metadata !6, metadata !11} ; [ DW_TAG_auto_variable ] [i] [line 5] +!17 = metadata !{metadata !"0xb\005\000\000", metadata !5, metadata !4} ; [ DW_TAG_lexical_block ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>] !18 = metadata !{i32 2, metadata !"Dwarf Version", i32 3} !19 = metadata !{i32 3, i32 0, metadata !4, null} !20 = metadata !{i32 4, i32 0, metadata !4, null} !21 = metadata !{i32 5, i32 0, metadata !17, null} !22 = metadata !{i32 6, i32 0, metadata !17, null} !26 = metadata !{i32 7, i32 0, metadata !4, null} -!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 2} diff --git a/test/Transforms/LoopVectorize/duplicated-metadata.ll b/test/Transforms/LoopVectorize/duplicated-metadata.ll new file mode 100644 index 0000000000..8353dca41c --- /dev/null +++ b/test/Transforms/LoopVectorize/duplicated-metadata.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -loop-vectorize -S 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; This test makes sure we don't duplicate the loop vectorizer's metadata +; while marking them as already vectorized (by setting width = 1), even +; at lower optimization levels, where no extra cleanup is done + +define void @_Z3fooPf(float* %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv + %p = load float* %arrayidx, align 4 + %mul = fmul float %p, 2.000000e+00 + store float %mul, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: ; preds = %for.body + ret void +} + +!0 = metadata !{metadata !0, metadata !1} +!1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 4} +; CHECK-NOT: !{metadata !"llvm.loop.vectorize.width", i32 4} +; CHECK: !{metadata !"llvm.loop.interleave.count", i32 1} diff --git a/test/Transforms/LoopVectorize/ee-crash.ll b/test/Transforms/LoopVectorize/ee-crash.ll index 8a4f8ce3c1..a3c0bb89b1 100644 --- a/test/Transforms/LoopVectorize/ee-crash.ll +++ b/test/Transforms/LoopVectorize/ee-crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/exact.ll b/test/Transforms/LoopVectorize/exact.ll new file mode 100644 index 0000000000..0a8fbf3373 --- /dev/null +++ b/test/Transforms/LoopVectorize/exact.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; CHECK-LABEL: @lshr_exact( +; CHECK: lshr exact <4 x i32> +define void @lshr_exact(i32* %x) { +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32* %x, i64 %iv + %0 = load i32* %arrayidx, align 4 + %conv1 = lshr exact i32 %0, 1 + store i32 %conv1, i32* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll index 21d09372d5..0fc55c8973 100644 --- a/test/Transforms/LoopVectorize/flags.ll +++ b/test/Transforms/LoopVectorize/flags.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll index 0dfbab0727..0f064ee2dc 100644 --- a/test/Transforms/LoopVectorize/float-reduction.ll +++ b/test/Transforms/LoopVectorize/float-reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -23,3 +23,25 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret float %add } + +;CHECK-LABEL: @foosub( +;CHECK: fsub fast <4 x float> +;CHECK: ret +define float @foosub(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %sum.04 = phi float [ 0.000000e+00, %entry ], [ %sub, %for.body ] + %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %sub = fsub fast float %sum.04, %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret float %sub +} diff --git a/test/Transforms/LoopVectorize/funcall.ll b/test/Transforms/LoopVectorize/funcall.ll index f1f068c43d..e03534fbac 100644 --- a/test/Transforms/LoopVectorize/funcall.ll +++ b/test/Transforms/LoopVectorize/funcall.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll index d8959d4c10..6c8af0bcd7 100644 --- a/test/Transforms/LoopVectorize/gcc-examples.ll +++ b/test/Transforms/LoopVectorize/gcc-examples.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s -; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-width=4 -force-vector-interleave=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll index d64d67f6a5..3f11ce8bf2 100644 --- a/test/Transforms/LoopVectorize/global_alias.ll +++ b/test/Transforms/LoopVectorize/global_alias.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -O1 -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" diff --git a/test/Transforms/LoopVectorize/hoist-loads.ll b/test/Transforms/LoopVectorize/hoist-loads.ll index 765e14d698..d0b27f1eae 100644 --- a/test/Transforms/LoopVectorize/hoist-loads.ll +++ b/test/Transforms/LoopVectorize/hoist-loads.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s +; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/i8-induction.ll b/test/Transforms/LoopVectorize/i8-induction.ll index 2a0e826454..90e3ec00cd 100644 --- a/test/Transforms/LoopVectorize/i8-induction.ll +++ b/test/Transforms/LoopVectorize/i8-induction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/if-conv-crash.ll b/test/Transforms/LoopVectorize/if-conv-crash.ll index f8f2cf1fff..67910bf6d5 100644 --- a/test/Transforms/LoopVectorize/if-conv-crash.ll +++ b/test/Transforms/LoopVectorize/if-conv-crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/if-conversion-nest.ll b/test/Transforms/LoopVectorize/if-conversion-nest.ll index 92cb06e5e5..b5ac8fca23 100644 --- a/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/test/Transforms/LoopVectorize/if-conversion-reduction.ll index 8cb703cdfa..455699c780 100644 --- a/test/Transforms/LoopVectorize/if-conversion-reduction.ll +++ b/test/Transforms/LoopVectorize/if-conversion-reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll index 6e3e8ed278..9e18528491 100644 --- a/test/Transforms/LoopVectorize/if-conversion.ll +++ b/test/Transforms/LoopVectorize/if-conversion.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/if-pred-stores.ll b/test/Transforms/LoopVectorize/if-pred-stores.ll index 7b0e181c84..c6067e01ee 100644 --- a/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-unroll=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL -; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-unroll=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC +; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL +; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/incorrect-dom-info.ll b/test/Transforms/LoopVectorize/incorrect-dom-info.ll new file mode 100644 index 0000000000..624ee7e505 --- /dev/null +++ b/test/Transforms/LoopVectorize/incorrect-dom-info.ll @@ -0,0 +1,142 @@ +; This test is based on one of benchmarks from SPEC2006. It exposes a bug with +; incorrect updating of the dom-tree. +; RUN: opt < %s -loop-vectorize -verify-dom-info +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +@PL_utf8skip = external constant [0 x i8] + +; Function Attrs: nounwind ssp uwtable +define void @Perl_pp_quotemeta() #0 { + %len = alloca i64, align 8 + br i1 undef, label %2, label %1 + +; <label>:1 ; preds = %0 + br label %3 + +; <label>:2 ; preds = %0 + br label %3 + +; <label>:3 ; preds = %2, %1 + br i1 undef, label %34, label %4 + +; <label>:4 ; preds = %3 + br i1 undef, label %5, label %6 + +; <label>:5 ; preds = %4 + br label %6 + +; <label>:6 ; preds = %5, %4 + br i1 undef, label %7, label %8 + +; <label>:7 ; preds = %6 + br label %8 + +; <label>:8 ; preds = %7, %6 + br i1 undef, label %.preheader, label %9 + +.preheader: ; preds = %9, %8 + br i1 undef, label %.loopexit, label %.lr.ph + +; <label>:9 ; preds = %8 + br i1 undef, label %thread-pre-split.preheader, label %.preheader + +thread-pre-split.preheader: ; preds = %9 + br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21 + +.thread-pre-split.loopexit_crit_edge: ; preds = %19 + %scevgep.sum = xor i64 %umax, -1 + %scevgep45 = getelementptr i8* %d.020, i64 %scevgep.sum + br label %thread-pre-split.loopexit + +thread-pre-split.loopexit: ; preds = %11, %.thread-pre-split.loopexit_crit_edge + %d.1.lcssa = phi i8* [ %scevgep45, %.thread-pre-split.loopexit_crit_edge ], [ %d.020, %11 ] + br i1 false, label %thread-pre-split._crit_edge, label %.lr.ph21 + +.lr.ph21: ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader + %d.020 = phi i8* [ undef, %26 ], [ %d.1.lcssa, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ] + %10 = phi i64 [ %28, %26 ], [ undef, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ] + br i1 undef, label %11, label %22 + +; <label>:11 ; preds = %.lr.ph21 + %12 = getelementptr inbounds [0 x i8]* @PL_utf8skip, i64 0, i64 undef + %13 = load i8* %12, align 1 + %14 = zext i8 %13 to i64 + %15 = icmp ugt i64 %14, %10 + %. = select i1 %15, i64 %10, i64 %14 + br i1 undef, label %thread-pre-split.loopexit, label %.lr.ph28 + +.lr.ph28: ; preds = %11 + %16 = xor i64 %10, -1 + %17 = xor i64 %14, -1 + %18 = icmp ugt i64 %16, %17 + %umax = select i1 %18, i64 %16, i64 %17 + br label %19 + +; <label>:19 ; preds = %19, %.lr.ph28 + %ulen.126 = phi i64 [ %., %.lr.ph28 ], [ %20, %19 ] + %20 = add i64 %ulen.126, -1 + %21 = icmp eq i64 %20, 0 + br i1 %21, label %.thread-pre-split.loopexit_crit_edge, label %19 + +; <label>:22 ; preds = %.lr.ph21 + br i1 undef, label %26, label %23 + +; <label>:23 ; preds = %22 + br i1 undef, label %26, label %24 + +; <label>:24 ; preds = %23 + br i1 undef, label %26, label %25 + +; <label>:25 ; preds = %24 + br label %26 + +; <label>:26 ; preds = %25, %24, %23, %22 + %27 = load i64* %len, align 8 + %28 = add i64 %27, -1 + br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21 + +thread-pre-split._crit_edge: ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader + br label %.loopexit + +.lr.ph: ; preds = %33, %.preheader + br i1 undef, label %29, label %thread-pre-split5 + +; <label>:29 ; preds = %.lr.ph + br i1 undef, label %33, label %30 + +; <label>:30 ; preds = %29 + br i1 undef, label %33, label %31 + +thread-pre-split5: ; preds = %.lr.ph + br i1 undef, label %33, label %31 + +; <label>:31 ; preds = %thread-pre-split5, %30 + br i1 undef, label %33, label %32 + +; <label>:32 ; preds = %31 + br label %33 + +; <label>:33 ; preds = %32, %31, %thread-pre-split5, %30, %29 + br i1 undef, label %.loopexit, label %.lr.ph + +.loopexit: ; preds = %33, %thread-pre-split._crit_edge, %.preheader + br label %35 + +; <label>:34 ; preds = %3 + br label %35 + +; <label>:35 ; preds = %34, %.loopexit + br i1 undef, label %37, label %36 + +; <label>:36 ; preds = %35 + br label %37 + +; <label>:37 ; preds = %36, %35 + ret void +} + +attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"clang version 3.6.0 "} diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll index 71bedb7334..067a76b51a 100644 --- a/test/Transforms/LoopVectorize/increment.ll +++ b/test/Transforms/LoopVectorize/increment.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll index 7dabcb2ba0..3f3491835b 100644 --- a/test/Transforms/LoopVectorize/induction.ll +++ b/test/Transforms/LoopVectorize/induction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -28,7 +28,7 @@ for.end: ret void } -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND ; Make sure we remove unneeded vectorization of induction variables. ; In order for instcombine to cleanup the vectorized induction variables that we diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll index 9c8201ab78..ce64c5b620 100644 --- a/test/Transforms/LoopVectorize/induction_plus.ll +++ b/test/Transforms/LoopVectorize/induction_plus.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll index 7dfaf03b0f..d48731a074 100644 --- a/test/Transforms/LoopVectorize/intrinsic.ll +++ b/test/Transforms/LoopVectorize/intrinsic.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -1192,3 +1192,59 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret void } + +declare float @llvm.minnum.f32(float, float) nounwind readnone + +;CHECK-LABEL: @minnum_f32( +;CHECK: llvm.minnum.v4f32 +;CHECK: ret void +define void @minnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv + %1 = load float* %arrayidx2, align 4 + %call = tail call float @llvm.minnum.f32(float %0, float %1) nounwind readnone + %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv + store float %call, float* %arrayidx4, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float @llvm.maxnum.f32(float, float) nounwind readnone + +;CHECK-LABEL: @maxnum_f32( +;CHECK: llvm.maxnum.v4f32 +;CHECK: ret void +define void @maxnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv + %1 = load float* %arrayidx2, align 4 + %call = tail call float @llvm.maxnum.f32(float %0, float %1) nounwind readnone + %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv + store float %call, float* %arrayidx4, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} diff --git a/test/Transforms/LoopVectorize/lcssa-crash.ll b/test/Transforms/LoopVectorize/lcssa-crash.ll index de6be54849..68cc74ed70 100644 --- a/test/Transforms/LoopVectorize/lcssa-crash.ll +++ b/test/Transforms/LoopVectorize/lcssa-crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LoopVectorize/lifetime.ll b/test/Transforms/LoopVectorize/lifetime.ll index 4f6f3b820a..ba36cc4f7a 100644 --- a/test/Transforms/LoopVectorize/lifetime.ll +++ b/test/Transforms/LoopVectorize/lifetime.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/memdep.ll b/test/Transforms/LoopVectorize/memdep.ll index 21cb703ba4..f857e800be 100644 --- a/test/Transforms/LoopVectorize/memdep.ll +++ b/test/Transforms/LoopVectorize/memdep.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S | FileCheck %s -; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -S | FileCheck %s -check-prefix=WIDTH +; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s -check-prefix=WIDTH target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/metadata-unroll.ll b/test/Transforms/LoopVectorize/metadata-unroll.ll index 2fcc53a315..848f1f9601 100644 --- a/test/Transforms/LoopVectorize/metadata-unroll.ll +++ b/test/Transforms/LoopVectorize/metadata-unroll.ll @@ -38,4 +38,4 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp { } !0 = metadata !{metadata !0, metadata !1} -!1 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 2} +!1 = metadata !{metadata !"llvm.loop.interleave.count", i32 2} diff --git a/test/Transforms/LoopVectorize/metadata-width.ll b/test/Transforms/LoopVectorize/metadata-width.ll index 87de655da6..da0c622c03 100644 --- a/test/Transforms/LoopVectorize/metadata-width.ll +++ b/test/Transforms/LoopVectorize/metadata-width.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LoopVectorize/metadata.ll b/test/Transforms/LoopVectorize/metadata.ll new file mode 100644 index 0000000000..14f60b3c4a --- /dev/null +++ b/test/Transforms/LoopVectorize/metadata.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) #0 { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !tbaa !0 + %conv = fptosi float %0 to i32 + %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv + store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 0 +} + +; CHECK-LABEL: @test1 +; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa ![[TFLT:[0-9]+]] +; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa ![[TINT:[0-9]+]] +; CHECK: ret i32 0 + +; CHECK-DAG: ![[TFLT]] = metadata !{metadata ![[TFLT1:[0-9]+]] +; CHECK-DAG: ![[TFLT1]] = metadata !{metadata !"float" + +; CHECK-DAG: ![[TINT]] = metadata !{metadata ![[TINT1:[0-9]+]] +; CHECK-DAG: ![[TINT1]] = metadata !{metadata !"int" + +attributes #0 = { nounwind uwtable } + +!0 = metadata !{metadata !1, metadata !1, i64 0} +!1 = metadata !{metadata !"float", metadata !2, i64 0} +!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0} +!3 = metadata !{metadata !"Simple C/C++ TBAA"} +!4 = metadata !{metadata !5, metadata !5, i64 0} +!5 = metadata !{metadata !"int", metadata !2, i64 0} + diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll index 0e47260984..e73e69db57 100644 --- a/test/Transforms/LoopVectorize/minmax_reduction.ll +++ b/test/Transforms/LoopVectorize/minmax_reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll index 88a29c50df..cd022adc93 100644 --- a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll +++ b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s +; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/test/Transforms/LoopVectorize/multiple-address-spaces.ll index 7d836dedbd..bb2af1e57c 100644 --- a/test/Transforms/LoopVectorize/multiple-address-spaces.ll +++ b/test/Transforms/LoopVectorize/multiple-address-spaces.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s ; From a simple program with two address spaces: ; char Y[4*10000] __attribute__((address_space(1))); diff --git a/test/Transforms/LoopVectorize/no_array_bounds.ll b/test/Transforms/LoopVectorize/no_array_bounds.ll new file mode 100644 index 0000000000..a39b44ff6c --- /dev/null +++ b/test/Transforms/LoopVectorize/no_array_bounds.ll @@ -0,0 +1,101 @@ +; RUN: opt < %s -loop-vectorize -S 2>&1 | FileCheck %s + +; Verify warning is generated when vectorization/ interleaving is explicitly specified and fails to occur. +; CHECK: warning: no_array_bounds.cpp:5:5: loop not vectorized: failed explicitly specified loop vectorization +; CHECK: warning: no_array_bounds.cpp:10:5: loop not interleaved: failed explicitly specified loop interleaving + +; #pragma clang loop vectorize(enable) +; for (int i = 0; i < number; i++) { +; A[B[i]]++; +; } + +; #pragma clang loop vectorize(disable) interleave(enable) +; for (int i = 0; i < number; i++) { +; B[A[i]]++; +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind ssp uwtable +define void @_Z4testPiS_i(i32* nocapture %A, i32* nocapture %B, i32 %number) #0 { +entry: + %cmp25 = icmp sgt i32 %number, 0, !dbg !10 + br i1 %cmp25, label %for.body.preheader, label %for.end15, !dbg !10, !llvm.loop !12 + +for.body.preheader: ; preds = %entry + br label %for.body, !dbg !14 + +for.cond5.preheader: ; preds = %for.body + br i1 %cmp25, label %for.body7.preheader, label %for.end15, !dbg !16, !llvm.loop !18 + +for.body7.preheader: ; preds = %for.cond5.preheader + br label %for.body7, !dbg !20 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv27 = phi i64 [ %indvars.iv.next28, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv27, !dbg !14 + %0 = load i32* %arrayidx, align 4, !dbg !14, !tbaa !22 + %idxprom1 = sext i32 %0 to i64, !dbg !14 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1, !dbg !14 + %1 = load i32* %arrayidx2, align 4, !dbg !14, !tbaa !22 + %inc = add nsw i32 %1, 1, !dbg !14 + store i32 %inc, i32* %arrayidx2, align 4, !dbg !14, !tbaa !22 + %indvars.iv.next28 = add nuw nsw i64 %indvars.iv27, 1, !dbg !10 + %lftr.wideiv29 = trunc i64 %indvars.iv.next28 to i32, !dbg !10 + %exitcond30 = icmp eq i32 %lftr.wideiv29, %number, !dbg !10 + br i1 %exitcond30, label %for.cond5.preheader, label %for.body, !dbg !10, !llvm.loop !12 + +for.body7: ; preds = %for.body7.preheader, %for.body7 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body7 ], [ 0, %for.body7.preheader ] + %arrayidx9 = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !20 + %2 = load i32* %arrayidx9, align 4, !dbg !20, !tbaa !22 + %idxprom10 = sext i32 %2 to i64, !dbg !20 + %arrayidx11 = getelementptr inbounds i32* %B, i64 %idxprom10, !dbg !20 + %3 = load i32* %arrayidx11, align 4, !dbg !20, !tbaa !22 + %inc12 = add nsw i32 %3, 1, !dbg !20 + store i32 %inc12, i32* %arrayidx11, align 4, !dbg !20, !tbaa !22 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !16 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !16 + %exitcond = icmp eq i32 %lftr.wideiv, %number, !dbg !16 + br i1 %exitcond, label %for.end15.loopexit, label %for.body7, !dbg !16, !llvm.loop !18 + +for.end15.loopexit: ; preds = %for.body7 + br label %for.end15 + +for.end15: ; preds = %for.end15.loopexit, %entry, %for.cond5.preheader + ret void, !dbg !26 +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\000\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] +!1 = metadata !{metadata !"no_array_bounds.cpp", metadata !"."} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{metadata !"0x2e\00test\00test\00\001\000\001\000\006\00256\001\002", metadata !1, metadata !5, metadata !6, null, void (i32*, i32*, i32)* @_Z4testPiS_i, null, null, metadata !2} ; [ DW_TAG_subprogram ] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] +!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} +!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 2} +!9 = metadata !{metadata !"clang version 3.5.0"} +!10 = metadata !{i32 4, i32 8, metadata !11, null} +!11 = metadata !{metadata !"0xb\004\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] +!12 = metadata !{metadata !12, metadata !13} +!13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} +!14 = metadata !{i32 5, i32 5, metadata !15, null} +!15 = metadata !{metadata !"0xb\004\0036\000", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ] +!16 = metadata !{i32 9, i32 8, metadata !17, null} +!17 = metadata !{metadata !"0xb\009\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] +!18 = metadata !{metadata !18, metadata !13, metadata !19} +!19 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1} +!20 = metadata !{i32 10, i32 5, metadata !21, null} +!21 = metadata !{metadata !"0xb\009\0036\000", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] +!22 = metadata !{metadata !23, metadata !23, i64 0} +!23 = metadata !{metadata !"int", metadata !24, i64 0} +!24 = metadata !{metadata !"omnipotent char", metadata !25, i64 0} +!25 = metadata !{metadata !"Simple C/C++ TBAA"} +!26 = metadata !{i32 12, i32 1, metadata !4, null} diff --git a/test/Transforms/LoopVectorize/no_idiv_reduction.ll b/test/Transforms/LoopVectorize/no_idiv_reduction.ll index 295fcabb0b..5c721a6806 100644 --- a/test/Transforms/LoopVectorize/no_idiv_reduction.ll +++ b/test/Transforms/LoopVectorize/no_idiv_reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s +; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s @a = common global [128 x i32] zeroinitializer, align 16 ;; Must not vectorize division reduction. Division is lossy. diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll index e572d1a884..1275915f5e 100644 --- a/test/Transforms/LoopVectorize/no_int_induction.ll +++ b/test/Transforms/LoopVectorize/no_int_induction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s ; int __attribute__((noinline)) sum_array(int *A, int n) { ; return std::accumulate(A, A + n, 0); diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll index 1f891ad2c4..bcd29c1a43 100644 --- a/test/Transforms/LoopVectorize/no_outside_user.ll +++ b/test/Transforms/LoopVectorize/no_outside_user.ll @@ -1,4 +1,7 @@ -; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s + +; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable +; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" diff --git a/test/Transforms/LoopVectorize/no_switch.ll b/test/Transforms/LoopVectorize/no_switch.ll index 52b42850f4..c989c6bc6b 100644 --- a/test/Transforms/LoopVectorize/no_switch.ll +++ b/test/Transforms/LoopVectorize/no_switch.ll @@ -1,7 +1,8 @@ ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s ; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement -; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly enabled with width 4 +; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info (Force=true, Vector Width=4) +; CHECK: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization ; CHECK: _Z11test_switchPii ; CHECK-NOT: x i32> @@ -58,28 +59,28 @@ attributes #0 = { nounwind } !llvm.module.flags = !{!7, !8} !llvm.ident = !{!9} -!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} +!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\006\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{metadata !"source.cpp", metadata !"."} !2 = metadata !{} !3 = metadata !{metadata !4} -!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_switch", metadata !"test_switch", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z11test_switchPii, null, null, metadata !2, i32 1} -!5 = metadata !{i32 786473, metadata !1} -!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} +!4 = metadata !{metadata !"0x2e\00test_switch\00test_switch\00\001\000\001\000\006\00256\001\001", metadata !1, metadata !5, metadata !6, null, void (i32*, i32)* @_Z11test_switchPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [test_switch] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] [./source.cpp] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} -!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 2} !9 = metadata !{metadata !"clang version 3.5.0"} !10 = metadata !{i32 3, i32 8, metadata !11, null} -!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0} +!11 = metadata !{metadata !"0xb\003\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] !12 = metadata !{metadata !12, metadata !13, metadata !13} !13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} !14 = metadata !{i32 4, i32 5, metadata !15, null} -!15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 36, i32 0, i32 1} +!15 = metadata !{metadata !"0xb\003\0036\000", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ] !16 = metadata !{metadata !17, metadata !17, i64 0} !17 = metadata !{metadata !"int", metadata !18, i64 0} !18 = metadata !{metadata !"omnipotent char", metadata !19, i64 0} !19 = metadata !{metadata !"Simple C/C++ TBAA"} !20 = metadata !{i32 6, i32 7, metadata !21, null} -!21 = metadata !{i32 786443, metadata !1, metadata !15, i32 4, i32 18, i32 0, i32 2} +!21 = metadata !{metadata !"0xb\004\0018\000", metadata !1, metadata !15} ; [ DW_TAG_lexical_block ] !22 = metadata !{i32 7, i32 5, metadata !21, null} !23 = metadata !{i32 9, i32 7, metadata !21, null} !24 = metadata !{i32 14, i32 1, metadata !4, null} diff --git a/test/Transforms/LoopVectorize/nofloat.ll b/test/Transforms/LoopVectorize/nofloat.ll index c3c81b6f84..e9f4c5f23e 100644 --- a/test/Transforms/LoopVectorize/nofloat.ll +++ b/test/Transforms/LoopVectorize/nofloat.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s ; Make sure that we don't vectorize functions with 'noimplicitfloat' attributes. diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll index 0c54a2b012..b03d4f05e1 100644 --- a/test/Transforms/LoopVectorize/non-const-n.ll +++ b/test/Transforms/LoopVectorize/non-const-n.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/nsw-crash.ll b/test/Transforms/LoopVectorize/nsw-crash.ll index e5fad14d0d..68d993340b 100644 --- a/test/Transforms/LoopVectorize/nsw-crash.ll +++ b/test/Transforms/LoopVectorize/nsw-crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/opt.ll b/test/Transforms/LoopVectorize/opt.ll index 27030a2ff2..a9be80fbfe 100644 --- a/test/Transforms/LoopVectorize/opt.ll +++ b/test/Transforms/LoopVectorize/opt.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -O3 -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=LOOPVEC %s -; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s +; RUN: opt -S -O3 -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck --check-prefix=LOOPVEC %s +; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/ptr_loops.ll b/test/Transforms/LoopVectorize/ptr_loops.ll index 15983f0685..3fb38fe1ef 100644 --- a/test/Transforms/LoopVectorize/ptr_loops.ll +++ b/test/Transforms/LoopVectorize/ptr_loops.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll index fc8f0a5482..2f7a96a09d 100644 --- a/test/Transforms/LoopVectorize/read-only.ll +++ b/test/Transforms/LoopVectorize/read-only.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll index 791fce1562..5e6b7fa332 100644 --- a/test/Transforms/LoopVectorize/reduction.ll +++ b/test/Transforms/LoopVectorize/reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll index 65ef95dcb1..da02d01706 100644 --- a/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/test/Transforms/LoopVectorize/reverse_induction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=4 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/reverse_iter.ll b/test/Transforms/LoopVectorize/reverse_iter.ll index f803120c4d..13172bb17a 100644 --- a/test/Transforms/LoopVectorize/reverse_iter.ll +++ b/test/Transforms/LoopVectorize/reverse_iter.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/runtime-check-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-address-space.ll index 6c86561a1c..34bbe5213f 100644 --- a/test/Transforms/LoopVectorize/runtime-check-address-space.ll +++ b/test/Transforms/LoopVectorize/runtime-check-address-space.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s +; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s ; Check vectorization that would ordinarily require a runtime bounds ; check on the pointers when mixing address spaces. For now we cannot diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll index 212b37ccea..56f1f9981d 100644 --- a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll +++ b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s +; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s ; Artificial datalayout target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64" diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll index 01e28bc446..9d02a6accd 100644 --- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll +++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK: br ;CHECK: br ;CHECK: getelementptr -;CHECK-NEXT: getelementptr +;CHECK-DAG: getelementptr ;CHECK-DAG: icmp uge ;CHECK-DAG: icmp uge ;CHECK-DAG: icmp uge diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll index d15479d202..1edafb4ed7 100644 --- a/test/Transforms/LoopVectorize/runtime-check.ll +++ b/test/Transforms/LoopVectorize/runtime-check.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll index 7370a6fb93..324949dafd 100644 --- a/test/Transforms/LoopVectorize/runtime-limit.ll +++ b/test/Transforms/LoopVectorize/runtime-limit.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/safegep.ll b/test/Transforms/LoopVectorize/safegep.ll index c9508601e2..f853afd2bb 100644 --- a/test/Transforms/LoopVectorize/safegep.ll +++ b/test/Transforms/LoopVectorize/safegep.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" diff --git a/test/Transforms/LoopVectorize/same-base-access.ll b/test/Transforms/LoopVectorize/same-base-access.ll index d623a34690..d19458f4aa 100644 --- a/test/Transforms/LoopVectorize/same-base-access.ll +++ b/test/Transforms/LoopVectorize/same-base-access.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll index 257c7bebe4..6b37cc23ae 100644 --- a/test/Transforms/LoopVectorize/scalar-select.ll +++ b/test/Transforms/LoopVectorize/scalar-select.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll index 683621a6f6..1bce3f8c5f 100644 --- a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll +++ b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=8 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=8 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx" diff --git a/test/Transforms/LoopVectorize/simple-unroll.ll b/test/Transforms/LoopVectorize/simple-unroll.ll index 83f35ffb60..8bf680ac7c 100644 --- a/test/Transforms/LoopVectorize/simple-unroll.ll +++ b/test/Transforms/LoopVectorize/simple-unroll.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/small-loop.ll b/test/Transforms/LoopVectorize/small-loop.ll index 49ce5c5397..1d30102c3f 100644 --- a/test/Transforms/LoopVectorize/small-loop.ll +++ b/test/Transforms/LoopVectorize/small-loop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll index 8f675afd80..cc474945a9 100644 --- a/test/Transforms/LoopVectorize/start-non-zero.ll +++ b/test/Transforms/LoopVectorize/start-non-zero.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/store-shuffle-bug.ll b/test/Transforms/LoopVectorize/store-shuffle-bug.ll index e53c1206e4..6d3d11349b 100644 --- a/test/Transforms/LoopVectorize/store-shuffle-bug.ll +++ b/test/Transforms/LoopVectorize/store-shuffle-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s +; RUN: opt -S -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll index 75beae82f1..cf6f325ae3 100644 --- a/test/Transforms/LoopVectorize/struct_access.ll +++ b/test/Transforms/LoopVectorize/struct_access.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/tbaa-nodep.ll b/test/Transforms/LoopVectorize/tbaa-nodep.ll new file mode 100644 index 0000000000..5cd104cfc5 --- /dev/null +++ b/test/Transforms/LoopVectorize/tbaa-nodep.ll @@ -0,0 +1,102 @@ +; RUN: opt < %s -tbaa -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s --check-prefix=CHECK-NOTBAA +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) #0 { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !tbaa !0 + %conv = fptosi float %0 to i32 + %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv + store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 0 + +; TBAA partitions the accesses in this loop, so it can be vectorized without +; runtime checks. + +; CHECK-LABEL: @test1 +; CHECK: entry: +; CHECK-NEXT: br label %vector.body +; CHECK: vector.body: + +; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa +; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa + +; CHECK: ret i32 0 + +; CHECK-NOTBAA-LABEL: @test1 +; CHECK-NOTBAA: icmp uge i32* + +; CHECK-NOTBAA: load <4 x float>* %{{.*}}, align 4, !tbaa +; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa + +; CHECK-NOTBAA: ret i32 0 +} + +; Function Attrs: nounwind uwtable +define i32 @test2(i32* nocapture readonly %a, float* nocapture readonly %b, float* nocapture %c) #0 { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv + %1 = load i32* %arrayidx2, align 4, !tbaa !4 + %conv = sitofp i32 %1 to float + %mul = fmul float %0, %conv + %arrayidx4 = getelementptr inbounds float* %c, i64 %indvars.iv + store float %mul, float* %arrayidx4, align 4, !tbaa !0 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 0 + +; This test is like the first, except here there is still one runtime check +; required. Without TBAA, however, two checks are required. + +; CHECK-LABEL: @test2 +; CHECK: icmp uge float* +; CHECK: icmp uge float* +; CHECK-NOT: icmp uge i32* + +; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa +; CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa + +; CHECK: ret i32 0 + +; CHECK-NOTBAA-LABEL: @test2 +; CHECK-NOTBAA: icmp uge float* +; CHECK-NOTBAA: icmp uge float* +; CHECK-NOTBAA-DAG: icmp uge float* +; CHECK-NOTBAA-DAG: icmp uge i32* + +; CHECK-NOTBAA: load <4 x float>* %{{.*}}, align 4, !tbaa +; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa + +; CHECK-NOTBAA: ret i32 0 +} + +attributes #0 = { nounwind uwtable } + +!0 = metadata !{metadata !1, metadata !1, i64 0} +!1 = metadata !{metadata !"float", metadata !2, i64 0} +!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0} +!3 = metadata !{metadata !"Simple C/C++ TBAA"} +!4 = metadata !{metadata !5, metadata !5, i64 0} +!5 = metadata !{metadata !"int", metadata !2, i64 0} + diff --git a/test/Transforms/LoopVectorize/undef-inst-bug.ll b/test/Transforms/LoopVectorize/undef-inst-bug.ll index ed60e801af..0444fe894e 100644 --- a/test/Transforms/LoopVectorize/undef-inst-bug.ll +++ b/test/Transforms/LoopVectorize/undef-inst-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/unroll_novec.ll b/test/Transforms/LoopVectorize/unroll_novec.ll index 89f4678526..257b4e63a9 100644 --- a/test/Transforms/LoopVectorize/unroll_novec.ll +++ b/test/Transforms/LoopVectorize/unroll_novec.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-unroll=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-interleave=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/unsized-pointee-crash.ll b/test/Transforms/LoopVectorize/unsized-pointee-crash.ll new file mode 100644 index 0000000000..5cc98378b9 --- /dev/null +++ b/test/Transforms/LoopVectorize/unsized-pointee-crash.ll @@ -0,0 +1,24 @@ +; RUN: opt -S -loop-vectorize < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @fn1 +define void @fn1() { +entry: + br label %for.body + +for.body: + %b.05 = phi i32 (...)* [ undef, %entry ], [ %1, %for.body ] + %a.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = bitcast i32 (...)* %b.05 to i8* + %add.ptr = getelementptr i8* %0, i64 1 + %1 = bitcast i8* %add.ptr to i32 (...)* +; CHECK: %[[cst:.*]] = bitcast i32 (...)* {{.*}} to i8* +; CHECK-NEXT: %[[gep:.*]] = getelementptr i8* %[[cst]], i64 1 + %inc = add nsw i32 %a.04, 1 + %exitcond = icmp eq i32 %a.04, 63 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/Transforms/LoopVectorize/value-ptr-bug.ll b/test/Transforms/LoopVectorize/value-ptr-bug.ll index 6b06afaf0d..7fb9095b7c 100644 --- a/test/Transforms/LoopVectorize/value-ptr-bug.ll +++ b/test/Transforms/LoopVectorize/value-ptr-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/vect.omp.persistence.ll b/test/Transforms/LoopVectorize/vect.omp.persistence.ll index f646567783..b0fe7a5e1d 100644 --- a/test/Transforms/LoopVectorize/vect.omp.persistence.ll +++ b/test/Transforms/LoopVectorize/vect.omp.persistence.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O2 -force-vector-unroll=2 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s +; RUN: opt < %s -O2 -force-vector-interleave=2 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s ; REQUIRES: asserts ; Loop from "rotated" diff --git a/test/Transforms/LoopVectorize/vect.stats.ll b/test/Transforms/LoopVectorize/vect.stats.ll index 92ec24f726..556da455b4 100644 --- a/test/Transforms/LoopVectorize/vect.stats.ll +++ b/test/Transforms/LoopVectorize/vect.stats.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s ; REQUIRES: asserts ; @@ -62,4 +62,4 @@ for.body: for.end: ret void -}
\ No newline at end of file +} diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll index 47de13dbfe..cee4b16951 100644 --- a/test/Transforms/LoopVectorize/vectorize-once.ll +++ b/test/Transforms/LoopVectorize/vectorize-once.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -70,7 +70,7 @@ attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" " ; CHECK: !0 = metadata !{metadata !0, metadata !1, metadata !2} ; CHECK: !1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1} -; CHECK: !2 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1} +; CHECK: !2 = metadata !{metadata !"llvm.loop.interleave.count", i32 1} ; CHECK: !3 = metadata !{metadata !3, metadata !1, metadata !2} !0 = metadata !{metadata !0, metadata !1} diff --git a/test/Transforms/LoopVectorize/version-mem-access.ll b/test/Transforms/LoopVectorize/version-mem-access.ll index 51d20e227d..7ac2fca253 100644 --- a/test/Transforms/LoopVectorize/version-mem-access.ll +++ b/test/Transforms/LoopVectorize/version-mem-access.ll @@ -1,4 +1,4 @@ -; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-unroll=1 < %s -S | FileCheck %s +; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 < %s -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/write-only.ll b/test/Transforms/LoopVectorize/write-only.ll index 71a9cd0dc5..2f100de823 100644 --- a/test/Transforms/LoopVectorize/write-only.ll +++ b/test/Transforms/LoopVectorize/write-only.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" |