130 files changed, 5007 insertions, 530 deletions
diff --git a/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll b/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
new file mode 100644
index 0000000000..f7adf73263
--- /dev/null
+++ b/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=arm-unknown-linux-gnueabi
+
+define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) {
+  %1 = load i32* undef                            ; <i32> [#uses=1]
+  %2 = sub i32 %1, 48                             ; <i32> [#uses=1]
+  br i1 undef, label %stack_overflow, label %no_overflow
+
+stack_overflow:                                   ; preds = %0
+  unreachable
+
+no_overflow:                                      ; preds = %0
+  %frame = inttoptr i32 %2 to [17 x i32]*         ; <[17 x i32]*> [#uses=4]
+  %3 = load i32* undef                            ; <i32> [#uses=1]
+  %4 = load i32* null                             ; <i32> [#uses=1]
+  %5 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
+  %6 = bitcast i32* %5 to [8 x i8]**              ; <[8 x i8]**> [#uses=1]
+  %7 = load [8 x i8]** %6                         ; <[8 x i8]*> [#uses=1]
+  %8 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 12 ; <i32*> [#uses=1]
+  %9 = load i32* %8                               ; <i32> [#uses=1]
+  br i1 undef, label %bci_13, label %bci_4
+
+bci_13:                                           ; preds = %no_overflow
+  br i1 undef, label %bci_30, label %bci_21
+
+bci_30:                                           ; preds = %bci_13
+  br i1 undef, label %bci_46, label %bci_35
+
+bci_46:                                           ; preds = %bci_30
+  %10 = sub i32 %4, %3                            ; <i32> [#uses=1]
+  %11 = load [8 x i8]** null                      ; <[8 x i8]*> [#uses=1]
+  %callee = bitcast [8 x i8]* %11 to [84 x i8]*   ; <[84 x i8]*> [#uses=1]
+  %12 = bitcast i8* undef to i32*                 ; <i32*> [#uses=1]
+  %base_pc7 = load i32* %12                       ; <i32> [#uses=2]
+  %13 = add i32 %base_pc7, 0                      ; <i32> [#uses=1]
+  %14 = inttoptr i32 %13 to void ([84 x i8]*, i32, [788 x i8]*)** ; <void ([84 x i8]*, i32, [788 x i8]*)**> [#uses=1]
+  %entry_point = load void ([84 x i8]*, i32, [788 x i8]*)** %14 ; <void ([84 x i8]*, i32, [788 x i8]*)*> [#uses=1]
+  %15 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 1 ; <i32*> [#uses=1]
+  %16 = ptrtoint i32* %15 to i32                  ; <i32> [#uses=1]
+  %stack_pointer_addr9 = bitcast i8* undef to i32* ; <i32*> [#uses=1]
+  store i32 %16, i32* %stack_pointer_addr9
+  %17 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 2 ; <i32*> [#uses=1]
+  store i32 %9, i32* %17
+  store i32 %10, i32* undef
+  store [84 x i8]* %method, [84 x i8]** undef
+  %18 = add i32 %base_pc, 20                      ; <i32> [#uses=1]
+  store i32 %18, i32* undef
+  store [8 x i8]* %7, [8 x i8]** undef
+  call void %entry_point([84 x i8]* %callee, i32 %base_pc7, [788 x i8]* %thread)
+  br i1 undef, label %no_exception, label %exception
+
+exception:                                        ; preds = %bci_46
+  ret void
+
+no_exception:                                     ; preds = %bci_46
+  ret void
+
+bci_35:                                           ; preds = %bci_30
+  ret void
+
+bci_21:                                           ; preds = %bci_13
+  ret void
+
+bci_4:                                            ; preds = %no_overflow
+  ret void
+}
diff --git a/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll b/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
new file mode 100644
index 0000000000..b0b4cb37d1
--- /dev/null
+++ b/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=arm
+
+define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) {
+  %1 = sub i32 undef, 48                          ; <i32> [#uses=1]
+  br i1 undef, label %stack_overflow, label %no_overflow
+
+stack_overflow:                                   ; preds = %0
+  unreachable
+
+no_overflow:                                      ; preds = %0
+  %frame = inttoptr i32 %1 to [17 x i32]*         ; <[17 x i32]*> [#uses=4]
+  %2 = load i32* null                             ; <i32> [#uses=2]
+  %3 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
+  %4 = load i32* %3                               ; <i32> [#uses=2]
+  %5 = load [8 x i8]** undef                      ; <[8 x i8]*> [#uses=2]
+  br i1 undef, label %bci_13, label %bci_4
+
+bci_13:                                           ; preds = %no_overflow
+  br i1 undef, label %bci_30, label %bci_21
+
+bci_30:                                           ; preds = %bci_13
+  %6 = icmp sle i32 %2, %4                        ; <i1> [#uses=1]
+  br i1 %6, label %bci_46, label %bci_35
+
+bci_46:                                           ; preds = %bci_30
+  store [84 x i8]* %method, [84 x i8]** undef
+  br i1 false, label %no_exception, label %exception
+
+exception:                                        ; preds = %bci_46
+  ret void
+
+no_exception:                                     ; preds = %bci_46
+  ret void
+
+bci_35:                                           ; preds = %bci_30
+  %7 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 15 ; <i32*> [#uses=1]
+  store i32 %2, i32* %7
+  %8 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
+  store i32 %4, i32* %8
+  %9 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
+  %10 = bitcast i32* %9 to [8 x i8]**             ; <[8 x i8]**> [#uses=1]
+  store [8 x i8]* %5, [8 x i8]** %10
+  call void inttoptr (i32 13839116 to void ([788 x i8]*, i32)*)([788 x i8]* %thread, i32 7)
+  ret void
+
+bci_21:                                           ; preds = %bci_13
+  ret void
+
+bci_4:                                            ; preds = %no_overflow
+  store [8 x i8]* %5, [8 x i8]** undef
+  store i32 undef, i32* undef
+  call void inttoptr (i32 13839116 to void ([788 x i8]*, i32)*)([788 x i8]* %thread, i32 7)
+  ret void
+}
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
index 72ec8efcc4..52ab8717c1 100644
--- a/test/CodeGen/ARM/arm-negative-stride.ll
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -1,7 +1,32 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 
+; This loop is rewritten with an indvar which counts down, which
+; frees up a register from holding the trip count.
+
 define void @test(i32* %P, i32 %A, i32 %i) nounwind {
 entry:
+; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2]
+        icmp eq i32 %i, 0               ; <i1>:0 [#uses=1]
+        br i1 %0, label %return, label %bb
+
+bb:             ; preds = %bb, %entry
+        %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]          ; <i32> [#uses=2]
+        %i_addr.09.0 = sub i32 %i, %indvar              ; <i32> [#uses=1]
+        %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0         ; <i32*> [#uses=1]
+        store i32 %A, i32* %tmp2
+        %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+        icmp eq i32 %indvar.next, %i            ; <i1>:1 [#uses=1]
+        br i1 %1, label %return, label %bb
+
+return:         ; preds = %bb, %entry
+        ret void
+}
+
+; This loop has a non-address use of the count-up indvar, so
+; it'll remain. Now the original store uses a negative-stride address.
+
+define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind {
+entry:
 ; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
         icmp eq i32 %i, 0               ; <i1>:0 [#uses=1]
         br i1 %0, label %return, label %bb
@@ -11,6 +36,7 @@ bb:             ; preds = %bb, %entry
         %i_addr.09.0 = sub i32 %i, %indvar              ; <i32> [#uses=1]
         %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0         ; <i32*> [#uses=1]
         store i32 %A, i32* %tmp2
+        store i32 %indvar, i32* null
         %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
         icmp eq i32 %indvar.next, %i            ; <i1>:1 [#uses=1]
         br i1 %1, label %return, label %bb
diff --git a/test/CodeGen/ARM/armv4.ll b/test/CodeGen/ARM/armv4.ll
new file mode 100644
index 0000000000..49b129dabd
--- /dev/null
+++ b/test/CodeGen/ARM/armv4.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-unknown-eabi | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=arm-unknown-eabi -mcpu=strongarm | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=arm-unknown-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=arm-unknown-eabi -mattr=+v6 | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=armv4t-unknown-eabi | FileCheck %s -check-prefix=THUMB
+
+define arm_aapcscc i32 @test(i32 %a) nounwind readnone {
+entry:
+; ARM: mov pc
+; THUMB: bx
+  ret i32 %a
+}
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index 3dd66ae71d..c60b75b574 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -20,3 +20,17 @@ define void @g.upgrd.1() {
         %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
 }
+
+define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
+; CHECKV4: m_231b
+; CHECKV4: bx r{{.*}}
+BB0:
+  %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
+  %t35 = volatile load i32* %5                    ; <i32> [#uses=1]
+  %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
+  %7 = getelementptr i32** %6, i32 86             ; <i32**> [#uses=1]
+  %8 = load i32** %7                              ; <i32*> [#uses=1]
+  %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
+  %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
+  ret i32* %10
+}
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
index 507ec2c7bd..1bbb96deee 100644
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed}
-; RUN: llc < %s -stats |& grep {.*Number of re-materialization}
+; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
 ; This test really wants to check that the resultant "cond_true" block only 
 ; has a single store in it, and that cond_true55 only has code to materialize 
 ; the constant and do a store.  We do *not* want something like this:
diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll
new file mode 100644
index 0000000000..d301c6a4ca
--- /dev/null
+++ b/test/CodeGen/ARM/neon_minmax.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+
+define float @fmin_ole(float %x) nounwind {
+;CHECK: fmin_ole:
+;CHECK: vmin.f32
+  %cond = fcmp ole float 1.0, %x
+  %min1 = select i1 %cond, float 1.0, float %x
+  ret float %min1
+}
+
+define float @fmin_ole_zero(float %x) nounwind {
+;CHECK: fmin_ole_zero:
+;CHECK-NOT: vmin.f32
+  %cond = fcmp ole float 0.0, %x
+  %min1 = select i1 %cond, float 0.0, float %x
+  ret float %min1
+}
+
+define float @fmin_ult(float %x) nounwind {
+;CHECK: fmin_ult:
+;CHECK: vmin.f32
+  %cond = fcmp ult float %x, 1.0
+  %min1 = select i1 %cond, float %x, float 1.0
+  ret float %min1
+}
+
+define float @fmax_ogt(float %x) nounwind {
+;CHECK: fmax_ogt:
+;CHECK: vmax.f32
+  %cond = fcmp ogt float 1.0, %x
+  %max1 = select i1 %cond, float 1.0, float %x
+  ret float %max1
+}
+
+define float @fmax_uge(float %x) nounwind {
+;CHECK: fmax_uge:
+;CHECK: vmax.f32
+  %cond = fcmp uge float %x, 1.0
+  %max1 = select i1 %cond, float %x, float 1.0
+  ret float %max1
+}
+
+define float @fmax_uge_zero(float %x) nounwind {
+;CHECK: fmax_uge_zero:
+;CHECK-NOT: vmax.f32
+  %cond = fcmp uge float %x, 0.0
+  %max1 = select i1 %cond, float %x, float 0.0
+  ret float %max1
+}
+
+define float @fmax_olt_reverse(float %x) nounwind {
+;CHECK: fmax_olt_reverse:
+;CHECK: vmax.f32
+  %cond = fcmp olt float %x, 1.0
+  %max1 = select i1 %cond, float 1.0, float %x
+  ret float %max1
+}
+
+define float @fmax_ule_reverse(float %x) nounwind {
+;CHECK: fmax_ule_reverse:
+;CHECK: vmax.f32
+  %cond = fcmp ult float 1.0, %x
+  %max1 = select i1 %cond, float %x, float 1.0
+  ret float %max1
+}
+
+define float @fmin_oge_reverse(float %x) nounwind {
+;CHECK: fmin_oge_reverse:
+;CHECK: vmin.f32
+  %cond = fcmp oge float %x, 1.0
+  %min1 = select i1 %cond, float 1.0, float %x
+  ret float %min1
+}
+
+define float @fmin_ugt_reverse(float %x) nounwind {
+;CHECK: fmin_ugt_reverse:
+;CHECK: vmin.f32
+  %cond = fcmp ugt float 1.0, %x
+  %min1 = select i1 %cond, float %x, float 1.0
+  ret float %min1
+}
diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll
index 367f782e0f..92c1cf1821 100644
--- a/test/CodeGen/ARM/remat.ll
+++ b/test/CodeGen/ARM/remat.ll
@@ -32,16 +32,16 @@ bb.i35:                                           ; preds = %bb142
   br label %phi1.exit
 
 phi1.exit:                                        ; preds = %bb.i35, %bb142
-  %.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=0]
+  %.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
   %9 = phi double [ %8, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
-  %10 = fmul double undef, %9                     ; <double> [#uses=0]
+  %10 = fmul double %.pn, %9                      ; <double> [#uses=1]
   br i1 %14, label %phi0.exit, label %bb.i
 
 bb.i:                                             ; preds = %phi1.exit
   unreachable
 
 phi0.exit:                                        ; preds = %phi1.exit
-  %11 = fsub double %4, undef                     ; <double> [#uses=1]
+  %11 = fsub double %4, %10                       ; <double> [#uses=1]
   %12 = fadd double 0.000000e+00, %11             ; <double> [#uses=1]
   store double %12, double* undef, align 4
   br label %bb142
diff --git a/test/CodeGen/Alpha/add.ll b/test/CodeGen/Alpha/add.ll
index 24a74188f8..cd883f64a6 100644
--- a/test/CodeGen/Alpha/add.ll
+++ b/test/CodeGen/Alpha/add.ll
@@ -4,9 +4,8 @@
 ; RUN: grep {	addl} %t.s | count 2
 ; RUN: grep {	addq} %t.s | count 2
 ; RUN: grep {	subl} %t.s | count 2
-; RUN: grep {	subq} %t.s | count 1
+; RUN: grep {	subq} %t.s | count 2
 ;
-; RUN: grep {lda \$0,-100(\$16)} %t.s | count 1
 ; RUN: grep {s4addl} %t.s | count 2
 ; RUN: grep {s8addl} %t.s | count 2
 ; RUN: grep {s4addq} %t.s | count 2
diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll
index 46da56681d..1ac1408290 100644
--- a/test/CodeGen/Blackfin/promote-logic.ll
+++ b/test/CodeGen/Blackfin/promote-logic.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=bfin > %t
-; XFAIL: *
+; RUN: llc < %s -march=bfin 
 
 ; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
 ; operation after LegalizeOps.
diff --git a/test/CodeGen/CellSPU/bss.ll b/test/CodeGen/CellSPU/bss.ll
new file mode 100644
index 0000000000..05a0f50039
--- /dev/null
+++ b/test/CodeGen/CellSPU/bss.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep "\.section" %t1.s | grep "\.bss" | count 1
+
+@bssVar = global i32 zeroinitializer
+
diff --git a/test/CodeGen/Generic/2007-05-05-Personality.ll b/test/CodeGen/Generic/2007-05-05-Personality.ll
index 27493261d5..c92783e5e4 100644
--- a/test/CodeGen/Generic/2007-05-05-Personality.ll
+++ b/test/CodeGen/Generic/2007-05-05-Personality.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -o - | grep zPLR
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -o - | grep zPL
 
 @error = external global i8		; <i8*> [#uses=2]
 
diff --git a/test/CodeGen/Generic/GC/argpromotion.ll b/test/CodeGen/Generic/GC/argpromotion.ll
index dda376d616..c63ce222b8 100644
--- a/test/CodeGen/Generic/GC/argpromotion.ll
+++ b/test/CodeGen/Generic/GC/argpromotion.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -anders-aa -argpromotion
+; RUN: opt < %s -argpromotion
 
 declare void @llvm.gcroot(i8**, i8*)
 
diff --git a/test/CodeGen/Generic/debug-info.ll b/test/CodeGen/Generic/debug-info.ll
deleted file mode 100644
index 20d9f913c1..0000000000
--- a/test/CodeGen/Generic/debug-info.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s
-
-        %lldb.compile_unit = type { i32, i16, i16, i8*, i8*, i8*, {  }* }
-@d.compile_unit7 = external global %lldb.compile_unit           ; <%lldb.compile_unit*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, %lldb.compile_unit*)
-
-define void @rb_raise(i32, ...) {
-entry:
-        br i1 false, label %strlen.exit, label %no_exit.i
-
-no_exit.i:              ; preds = %entry
-        ret void
-
-strlen.exit:            ; preds = %entry
-        call void @llvm.dbg.stoppoint( i32 4358, i32 0, %lldb.compile_unit* @d.compile_unit7 )
-        unreachable
-}
-
diff --git a/test/CodeGen/MBlaze/brind.ll b/test/CodeGen/MBlaze/brind.ll
new file mode 100644
index 0000000000..7798e0f56a
--- /dev/null
+++ b/test/CodeGen/MBlaze/brind.ll
@@ -0,0 +1,73 @@
+; Ensure that the select instruction is supported and is lowered to 
+; some sort of branch instruction.
+;
+; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+@MSG = internal constant [13 x i8] c"Message: %d\0A\00"
+
+@BLKS = private constant [5 x i8*]
+    [ i8* blockaddress(@brind, %L1),
+      i8* blockaddress(@brind, %L2),
+      i8* blockaddress(@brind, %L3),
+      i8* blockaddress(@brind, %L4),
+      i8* blockaddress(@brind, %L5) ]
+
+define i32 @brind(i32 %a, i32 %b)
+{
+    ; CHECK:        brind:
+entry:
+    br label %loop
+
+loop:
+    %tmp.0 = phi i32 [ 0, %entry ], [ %tmp.8, %finish ]
+    %dst.0 = getelementptr [5 x i8*]* @BLKS, i32 0, i32 %tmp.0
+    %dst.1 = load i8** %dst.0
+    indirectbr i8* %dst.1, [ label %L1,
+                             label %L2,
+                             label %L3,
+                             label %L4,
+                             label %L5 ]
+    ; CHECK:        br {{r[0-9]*}}
+
+L1:
+    %tmp.1 = add i32 %a, %b
+    br label %finish
+    ; CHECK:        br
+
+L2:
+    %tmp.2 = sub i32 %a, %b
+    br label %finish
+    ; CHECK:        br
+
+L3:
+    %tmp.3 = mul i32 %a, %b
+    br label %finish
+    ; CHECK:        br
+
+L4:
+    %tmp.4 = sdiv i32 %a, %b
+    br label %finish
+    ; CHECK:        br
+
+L5:
+    %tmp.5 = srem i32 %a, %b
+    br label %finish
+    ; CHECK:        br
+
+finish:
+    %tmp.6 = phi i32 [ %tmp.1, %L1 ],
+                     [ %tmp.2, %L2 ],
+                     [ %tmp.3, %L3 ],
+                     [ %tmp.4, %L4 ],
+                     [ %tmp.5, %L5 ]
+
+    call i32 (i8*,...)* @printf( i8* getelementptr([13 x i8]* @MSG,i32 0,i32 0),
+                                 i32 %tmp.6)
+
+    %tmp.7 = add i32 %tmp.0, 1
+    %tmp.8 = urem i32 %tmp.7, 5
+
+    br label %loop
+    ; CHECK:        br
+}
diff --git a/test/CodeGen/MBlaze/callind.ll b/test/CodeGen/MBlaze/callind.ll
new file mode 100644
index 0000000000..bfc8d001fd
--- /dev/null
+++ b/test/CodeGen/MBlaze/callind.ll
@@ -0,0 +1,80 @@
+; Ensure that indirect calls work and that they are lowered to some
+; sort of branch and link instruction.
+;
+; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+@MSG = internal constant [13 x i8] c"Message: %d\0A\00"
+
+@FUNS = private constant [5 x i32 (i32,i32)*]
+    [ i32 (i32,i32)* @doadd,
+      i32 (i32,i32)* @dosub,
+      i32 (i32,i32)* @domul,
+      i32 (i32,i32)* @dodiv,
+      i32 (i32,i32)* @dorem ]
+
+define i32 @doadd(i32 %a, i32 %b)
+{
+    ; CHECK:        doadd:
+    %tmp.0 = add i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @dosub(i32 %a, i32 %b)
+{
+    ; CHECK:        dosub:
+    %tmp.0 = sub i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @domul(i32 %a, i32 %b)
+{
+    ; CHECK:        domul:
+    %tmp.0 = mul i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @dodiv(i32 %a, i32 %b)
+{
+    ; CHECK:        dodiv:
+    %tmp.0 = sdiv i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @dorem(i32 %a, i32 %b)
+{
+    ; CHECK:        dorem:
+    %tmp.0 = srem i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @callind(i32 %a, i32 %b)
+{
+    ; CHECK:        callind:
+entry:
+    br label %loop
+
+loop:
+    %tmp.0 = phi i32 [ 0, %entry ], [ %tmp.3, %loop ]
+    %dst.0 = getelementptr [5 x i32 (i32,i32)*]* @FUNS, i32 0, i32 %tmp.0
+    %dst.1 = load i32 (i32,i32)** %dst.0
+    %tmp.1 = call i32 %dst.1(i32 %a, i32 %b)
+    ; CHECK-NOT:    brli
+    ; CHECK-NOT:    brlai
+    ; CHECK:        brl
+
+    call i32 (i8*,...)* @printf( i8* getelementptr([13 x i8]* @MSG,i32 0,i32 0),
+                                 i32 %tmp.1)
+    ; CHECK:        brl
+
+    %tmp.2 = add i32 %tmp.0, 1
+    %tmp.3 = urem i32 %tmp.2, 5
+
+    br label %loop
+    ; CHECK:        br
+}
diff --git a/test/CodeGen/MBlaze/cc.ll b/test/CodeGen/MBlaze/cc.ll
new file mode 100644
index 0000000000..aaa918ffc3
--- /dev/null
+++ b/test/CodeGen/MBlaze/cc.ll
@@ -0,0 +1,315 @@
+; Test some of the calling convention lowering done by the MBlaze backend.
+; We test that integer values are passed in the correct registers and
+; returned in the correct registers. Additionally, we test that the stack
+; is used as appropriate for passing arguments that cannot be placed into
+; registers.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+@MSG = internal constant [13 x i8] c"Message: %d\0A\00"
+
+define void @params0_noret() {
+    ; CHECK:        params0_noret:
+    ret void
+    ; CHECK-NOT:    {{.* r3, r0, 1}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i8 @params0_8bitret() {
+    ; CHECK:        params0_8bitret:
+    ret i8 1
+    ; CHECK:        {{.* r3, r0, 1}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i16 @params0_16bitret() {
+    ; CHECK:        params0_16bitret:
+    ret i16 1
+    ; CHECK:        {{.* r3, r0, 1}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params0_32bitret() {
+    ; CHECK:        params0_32bitret:
+    ret i32 1
+    ; CHECK:        {{.* r3, r0, 1}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i64 @params0_64bitret() {
+    ; CHECK:        params0_64bitret:
+    ret i64 1
+    ; CHECK:        {{.* r3, r0, .*}}
+    ; CHECK:        {{.* r4, r0, 1}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params1_32bitret(i32 %a) {
+    ; CHECK:        params1_32bitret:
+    ret i32 %a
+    ; CHECK:        {{.* r3, r5, r0}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params2_32bitret(i32 %a, i32 %b) {
+    ; CHECK:        params2_32bitret:
+    ret i32 %b
+    ; CHECK:        {{.* r3, r6, r0}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params3_32bitret(i32 %a, i32 %b, i32 %c) {
+    ; CHECK:        params3_32bitret:
+    ret i32 %c
+    ; CHECK:        {{.* r3, r7, r0}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params4_32bitret(i32 %a, i32 %b, i32 %c, i32 %d) {
+    ; CHECK:        params4_32bitret:
+    ret i32 %d
+    ; CHECK:        {{.* r3, r8, r0}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params5_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+    ; CHECK:        params5_32bitret:
+    ret i32 %e
+    ; CHECK:        {{.* r3, r9, r0}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params6_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
+    ; CHECK:        params6_32bitret:
+    ret i32 %f
+    ; CHECK:        {{.* r3, r10, r0}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params7_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                             i32 %g) {
+    ; CHECK:        params7_32bitret:
+    ret i32 %g
+    ; CHECK:        {{lwi? r3, r1, 32}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params8_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                             i32 %g, i32 %h) {
+    ; CHECK:        params8_32bitret:
+    ret i32 %h
+    ; CHECK:        {{lwi? r3, r1, 36}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params9_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                             i32 %g, i32 %h, i32 %i) {
+    ; CHECK:        params9_32bitret:
+    ret i32 %i
+    ; CHECK:        {{lwi? r3, r1, 40}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params10_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                              i32 %g, i32 %h, i32 %i, i32 %j) {
+    ; CHECK:        params10_32bitret:
+    ret i32 %j
+    ; CHECK:        {{lwi? r3, r1, 44}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define void @testing() {
+    %MSG.1 = getelementptr [13 x i8]* @MSG, i32 0, i32 0
+
+    call void @params0_noret()
+    ; CHECK:        brlid
+
+    %tmp.1 = call i8 @params0_8bitret()
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i8 %tmp.1)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    %tmp.2 = call i16 @params0_16bitret()
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i16 %tmp.2)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    %tmp.3 = call i32 @params0_32bitret()
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.3)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    %tmp.4 = call i64 @params0_64bitret()
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i64 %tmp.4)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK:        {{.* r7, r4, r0}}
+    ; CHECK:        brlid
+
+    %tmp.5 = call i32 @params1_32bitret(i32 1)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.5)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    %tmp.6 = call i32 @params2_32bitret(i32 1, i32 2)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.6)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    %tmp.7 = call i32 @params3_32bitret(i32 1, i32 2, i32 3)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.7)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    %tmp.8 = call i32 @params4_32bitret(i32 1, i32 2, i32 3, i32 4)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.8)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    %tmp.9 = call i32 @params5_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.9)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    %tmp.10 = call i32 @params6_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                         i32 6)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.10)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    %tmp.11 = call i32 @params7_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                         i32 6, i32 7)
+    ; CHECK:        {{swi? .*, r1, 28}}
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.11)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    %tmp.12 = call i32 @params8_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                         i32 6, i32 7, i32 8)
+    ; CHECK:        {{swi? .*, r1, 28}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.12)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    %tmp.13 = call i32 @params9_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                         i32 6, i32 7, i32 8, i32 9)
+    ; CHECK:        {{swi? .*, r1, 28}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 36}}
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.13)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    %tmp.14 = call i32 @params10_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                          i32 6, i32 7, i32 8, i32 9, i32 10)
+    ; CHECK:        {{swi? .*, r1, 28}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 36}}
+    ; CHECK:        {{swi? .*, r1, 40}}
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.14)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, r3, r0}}
+    ; CHECK-NOT:    {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+
+    ret void
+}
diff --git a/test/CodeGen/MBlaze/dg.exp b/test/CodeGen/MBlaze/dg.exp
new file mode 100644
index 0000000000..bfd5e47157
--- /dev/null
+++ b/test/CodeGen/MBlaze/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target MBlaze] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/MBlaze/div.ll b/test/CodeGen/MBlaze/div.ll
new file mode 100644
index 0000000000..fae9830619
--- /dev/null
+++ b/test/CodeGen/MBlaze/div.ll
@@ -0,0 +1,75 @@
+; Ensure that multiplication is lowered to function calls when the multiplier
+; unit is not available in the hardware and that function calls are not used
+; when the multiplier unit is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+div | FileCheck -check-prefix=DIV %s
+
+define i8 @test_i8(i8 %a, i8 %b) {
+    ; FUN:        test_i8:
+    ; DIV:        test_i8:
+
+    %tmp.1 = udiv i8 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV:        idivu
+
+    %tmp.2 = sdiv i8 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV-NOT:    idivu
+    ; DIV:        idiv
+
+    %tmp.3 = add i8 %tmp.1, %tmp.2
+    ret i8 %tmp.3
+    ; FUN:        rtsd
+    ; DIV:        rtsd
+}
+
+define i16 @test_i16(i16 %a, i16 %b) {
+    ; FUN:        test_i16:
+    ; DIV:        test_i16:
+
+    %tmp.1 = udiv i16 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV:        idivu
+
+    %tmp.2 = sdiv i16 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV-NOT:    idivu
+    ; DIV:        idiv
+
+    %tmp.3 = add i16 %tmp.1, %tmp.2
+    ret i16 %tmp.3
+    ; FUN:        rtsd
+    ; DIV:        rtsd
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+    ; FUN:        test_i32:
+    ; DIV:        test_i32:
+
+    %tmp.1 = udiv i32 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV:        idivu
+
+    %tmp.2 = sdiv i32 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV-NOT:    idivu
+    ; DIV:        idiv
+
+    %tmp.3 = add i32 %tmp.1, %tmp.2
+    ret i32 %tmp.3
+    ; FUN:        rtsd
+    ; DIV:        rtsd
+}
diff --git a/test/CodeGen/MBlaze/fpu.ll b/test/CodeGen/MBlaze/fpu.ll
new file mode 100644
index 0000000000..83f4d83124
--- /dev/null
+++ b/test/CodeGen/MBlaze/fpu.ll
@@ -0,0 +1,66 @@
+; Ensure that floating point operations are lowered to function calls when the
+; FPU is not available in the hardware and that function calls are not used
+; when the FPU is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+fpu | FileCheck -check-prefix=FPU %s
+
+define float @test_add(float %a, float %b) {
+    ; FUN:        test_add:
+    ; FPU:        test_add:
+
+    %tmp.1 = fadd float %a, %b
+    ; FUN-NOT:    fadd
+    ; FUN:        brlid
+    ; FPU-NOT:    brlid
+    ; FPU:        fadd
+
+    ret float %tmp.1
+    ; FUN:        rtsd
+    ; FPU:        rtsd
+}
+
+define float @test_sub(float %a, float %b) {
+    ; FUN:        test_sub:
+    ; FPU:        test_sub:
+
+    %tmp.1 = fsub float %a, %b
+    ; FUN-NOT:    frsub
+    ; FUN:        brlid
+    ; FPU-NOT:    brlid
+    ; FPU:        frsub
+
+    ret float %tmp.1
+    ; FUN:        rtsd
+    ; FPU:        rtsd
+}
+
+define float @test_mul(float %a, float %b) {
+    ; FUN:        test_mul:
+    ; FPU:        test_mul:
+
+    %tmp.1 = fmul float %a, %b
+    ; FUN-NOT:    fmul
+    ; FUN:        brlid
+    ; FPU-NOT:    brlid
+    ; FPU:        fmul
+
+    ret float %tmp.1
+    ; FUN:        rtsd
+    ; FPU:        rtsd
+}
+
+define float @test_div(float %a, float %b) {
+    ; FUN:        test_div:
+    ; FPU:        test_div:
+
+    %tmp.1 = fdiv float %a, %b
+    ; FUN-NOT:    fdiv
+    ; FUN:        brlid
+    ; FPU-NOT:    brlid
+    ; FPU:        fdiv
+
+    ret float %tmp.1
+    ; FUN:        rtsd
+    ; FPU:        rtsd
+}
diff --git a/test/CodeGen/MBlaze/fsl.ll b/test/CodeGen/MBlaze/fsl.ll
new file mode 100644
index 0000000000..f9c6205bc1
--- /dev/null
+++ b/test/CodeGen/MBlaze/fsl.ll
@@ -0,0 +1,323 @@
+; Ensure that the FSL instrinsic instruction generate single FSL instructions
+; at the machine level. Additionally, ensure that dynamic values use the
+; dynamic version of the instructions and that constant values use the
+; constant version of the instructions.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+declare i32 @llvm.mblaze.fsl.get(i32 %port)
+declare i32 @llvm.mblaze.fsl.aget(i32 %port)
+declare i32 @llvm.mblaze.fsl.cget(i32 %port)
+declare i32 @llvm.mblaze.fsl.caget(i32 %port)
+declare i32 @llvm.mblaze.fsl.eget(i32 %port)
+declare i32 @llvm.mblaze.fsl.eaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.ecget(i32 %port)
+declare i32 @llvm.mblaze.fsl.ecaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.nget(i32 %port)
+declare i32 @llvm.mblaze.fsl.naget(i32 %port)
+declare i32 @llvm.mblaze.fsl.ncget(i32 %port)
+declare i32 @llvm.mblaze.fsl.ncaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.neget(i32 %port)
+declare i32 @llvm.mblaze.fsl.neaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.necget(i32 %port)
+declare i32 @llvm.mblaze.fsl.necaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tget(i32 %port)
+declare i32 @llvm.mblaze.fsl.taget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tcget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tcaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.teget(i32 %port)
+declare i32 @llvm.mblaze.fsl.teaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tecget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tecaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tnget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tnaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tncget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tncaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tneget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tneaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tnecget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tnecaget(i32 %port)
+
+declare void @llvm.mblaze.fsl.put(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.aput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.cput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.caput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.nput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.naput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.ncput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.ncaput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.tput(i32 %port)
+declare void @llvm.mblaze.fsl.taput(i32 %port)
+declare void @llvm.mblaze.fsl.tcput(i32 %port)
+declare void @llvm.mblaze.fsl.tcaput(i32 %port)
+declare void @llvm.mblaze.fsl.tnput(i32 %port)
+declare void @llvm.mblaze.fsl.tnaput(i32 %port)
+declare void @llvm.mblaze.fsl.tncput(i32 %port)
+declare void @llvm.mblaze.fsl.tncaput(i32 %port)
+
+define i32 @fsl_get(i32 %port)
+{
+    ; CHECK:        fsl_get:
+    %v0  = call i32 @llvm.mblaze.fsl.get(i32 %port)
+    ; CHECK:        getd
+    %v1  = call i32 @llvm.mblaze.fsl.aget(i32 %port)
+    ; CHECK-NEXT:   agetd
+    %v2  = call i32 @llvm.mblaze.fsl.cget(i32 %port)
+    ; CHECK-NEXT:   cgetd
+    %v3  = call i32 @llvm.mblaze.fsl.caget(i32 %port)
+    ; CHECK-NEXT:   cagetd
+    %v4  = call i32 @llvm.mblaze.fsl.eget(i32 %port)
+    ; CHECK-NEXT:   egetd
+    %v5  = call i32 @llvm.mblaze.fsl.eaget(i32 %port)
+    ; CHECK-NEXT:   eagetd
+    %v6  = call i32 @llvm.mblaze.fsl.ecget(i32 %port)
+    ; CHECK-NEXT:   ecgetd
+    %v7  = call i32 @llvm.mblaze.fsl.ecaget(i32 %port)
+    ; CHECK-NEXT:   ecagetd
+    %v8  = call i32 @llvm.mblaze.fsl.nget(i32 %port)
+    ; CHECK-NEXT:   ngetd
+    %v9  = call i32 @llvm.mblaze.fsl.naget(i32 %port)
+    ; CHECK-NEXT:   nagetd
+    %v10 = call i32 @llvm.mblaze.fsl.ncget(i32 %port)
+    ; CHECK-NEXT:   ncgetd
+    %v11 = call i32 @llvm.mblaze.fsl.ncaget(i32 %port)
+    ; CHECK-NEXT:   ncagetd
+    %v12 = call i32 @llvm.mblaze.fsl.neget(i32 %port)
+    ; CHECK-NEXT:   negetd
+    %v13 = call i32 @llvm.mblaze.fsl.neaget(i32 %port)
+    ; CHECK-NEXT:   neagetd
+    %v14 = call i32 @llvm.mblaze.fsl.necget(i32 %port)
+    ; CHECK-NEXT:   necgetd
+    %v15 = call i32 @llvm.mblaze.fsl.necaget(i32 %port)
+    ; CHECK-NEXT:   necagetd
+    %v16 = call i32 @llvm.mblaze.fsl.tget(i32 %port)
+    ; CHECK-NEXT:   tgetd
+    %v17 = call i32 @llvm.mblaze.fsl.taget(i32 %port)
+    ; CHECK-NEXT:   tagetd
+    %v18 = call i32 @llvm.mblaze.fsl.tcget(i32 %port)
+    ; CHECK-NEXT:   tcgetd
+    %v19 = call i32 @llvm.mblaze.fsl.tcaget(i32 %port)
+    ; CHECK-NEXT:   tcagetd
+    %v20 = call i32 @llvm.mblaze.fsl.teget(i32 %port)
+    ; CHECK-NEXT:   tegetd
+    %v21 = call i32 @llvm.mblaze.fsl.teaget(i32 %port)
+    ; CHECK-NEXT:   teagetd
+    %v22 = call i32 @llvm.mblaze.fsl.tecget(i32 %port)
+    ; CHECK-NEXT:   tecgetd
+    %v23 = call i32 @llvm.mblaze.fsl.tecaget(i32 %port)
+    ; CHECK-NEXT:   tecagetd
+    %v24 = call i32 @llvm.mblaze.fsl.tnget(i32 %port)
+    ; CHECK-NEXT:   tngetd
+    %v25 = call i32 @llvm.mblaze.fsl.tnaget(i32 %port)
+    ; CHECK-NEXT:   tnagetd
+    %v26 = call i32 @llvm.mblaze.fsl.tncget(i32 %port)
+    ; CHECK-NEXT:   tncgetd
+    %v27 = call i32 @llvm.mblaze.fsl.tncaget(i32 %port)
+    ; CHECK-NEXT:   tncagetd
+    %v28 = call i32 @llvm.mblaze.fsl.tneget(i32 %port)
+    ; CHECK-NEXT:   tnegetd
+    %v29 = call i32 @llvm.mblaze.fsl.tneaget(i32 %port)
+    ; CHECK-NEXT:   tneagetd
+    %v30 = call i32 @llvm.mblaze.fsl.tnecget(i32 %port)
+    ; CHECK-NEXT:   tnecgetd
+    %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 %port)
+    ; CHECK-NEXT:   tnecagetd
+    ret i32 1
+    ; CHECK:        rtsd
+}
+
+define i32 @fslc_get()
+{
+    ; CHECK:        fslc_get:
+    %v0  = call i32 @llvm.mblaze.fsl.get(i32 1)
+    ; CHECK:        get
+    %v1  = call i32 @llvm.mblaze.fsl.aget(i32 1)
+    ; CHECK-NOT:    agetd
+    ; CHECK:        aget
+    %v2  = call i32 @llvm.mblaze.fsl.cget(i32 1)
+    ; CHECK-NOT:    cgetd
+    ; CHECK:        cget
+    %v3  = call i32 @llvm.mblaze.fsl.caget(i32 1)
+    ; CHECK-NOT:    cagetd
+    ; CHECK:        caget
+    %v4  = call i32 @llvm.mblaze.fsl.eget(i32 1)
+    ; CHECK-NOT:    egetd
+    ; CHECK:        eget
+    %v5  = call i32 @llvm.mblaze.fsl.eaget(i32 1)
+    ; CHECK-NOT:    eagetd
+    ; CHECK:        eaget
+    %v6  = call i32 @llvm.mblaze.fsl.ecget(i32 1)
+    ; CHECK-NOT:    ecgetd
+    ; CHECK:        ecget
+    %v7  = call i32 @llvm.mblaze.fsl.ecaget(i32 1)
+    ; CHECK-NOT:    ecagetd
+    ; CHECK:        ecaget
+    %v8  = call i32 @llvm.mblaze.fsl.nget(i32 1)
+    ; CHECK-NOT:    ngetd
+    ; CHECK:        nget
+    %v9  = call i32 @llvm.mblaze.fsl.naget(i32 1)
+    ; CHECK-NOT:    nagetd
+    ; CHECK:        naget
+    %v10 = call i32 @llvm.mblaze.fsl.ncget(i32 1)
+    ; CHECK-NOT:    ncgetd
+    ; CHECK:        ncget
+    %v11 = call i32 @llvm.mblaze.fsl.ncaget(i32 1)
+    ; CHECK-NOT:    ncagetd
+    ; CHECK:        ncaget
+    %v12 = call i32 @llvm.mblaze.fsl.neget(i32 1)
+    ; CHECK-NOT:    negetd
+    ; CHECK:        neget
+    %v13 = call i32 @llvm.mblaze.fsl.neaget(i32 1)
+    ; CHECK-NOT:    neagetd
+    ; CHECK:        neaget
+    %v14 = call i32 @llvm.mblaze.fsl.necget(i32 1)
+    ; CHECK-NOT:    necgetd
+    ; CHECK:        necget
+    %v15 = call i32 @llvm.mblaze.fsl.necaget(i32 1)
+    ; CHECK-NOT:    necagetd
+    ; CHECK:        necaget
+    %v16 = call i32 @llvm.mblaze.fsl.tget(i32 1)
+    ; CHECK-NOT:    tgetd
+    ; CHECK:        tget
+    %v17 = call i32 @llvm.mblaze.fsl.taget(i32 1)
+    ; CHECK-NOT:    tagetd
+    ; CHECK:        taget
+    %v18 = call i32 @llvm.mblaze.fsl.tcget(i32 1)
+    ; CHECK-NOT:    tcgetd
+    ; CHECK:        tcget
+    %v19 = call i32 @llvm.mblaze.fsl.tcaget(i32 1)
+    ; CHECK-NOT:    tcagetd
+    ; CHECK:        tcaget
+    %v20 = call i32 @llvm.mblaze.fsl.teget(i32 1)
+    ; CHECK-NOT:    tegetd
+    ; CHECK:        teget
+    %v21 = call i32 @llvm.mblaze.fsl.teaget(i32 1)
+    ; CHECK-NOT:    teagetd
+    ; CHECK:        teaget
+    %v22 = call i32 @llvm.mblaze.fsl.tecget(i32 1)
+    ; CHECK-NOT:    tecgetd
+    ; CHECK:        tecget
+    %v23 = call i32 @llvm.mblaze.fsl.tecaget(i32 1)
+    ; CHECK-NOT:    tecagetd
+    ; CHECK:        tecaget
+    %v24 = call i32 @llvm.mblaze.fsl.tnget(i32 1)
+    ; CHECK-NOT:    tngetd
+    ; CHECK:        tnget
+    %v25 = call i32 @llvm.mblaze.fsl.tnaget(i32 1)
+    ; CHECK-NOT:    tnagetd
+    ; CHECK:        tnaget
+    %v26 = call i32 @llvm.mblaze.fsl.tncget(i32 1)
+    ; CHECK-NOT:    tncgetd
+    ; CHECK:        tncget
+    %v27 = call i32 @llvm.mblaze.fsl.tncaget(i32 1)
+    ; CHECK-NOT:    tncagetd
+    ; CHECK:        tncaget
+    %v28 = call i32 @llvm.mblaze.fsl.tneget(i32 1)
+    ; CHECK-NOT:    tnegetd
+    ; CHECK:        tneget
+    %v29 = call i32 @llvm.mblaze.fsl.tneaget(i32 1)
+    ; CHECK-NOT:    tneagetd
+    ; CHECK:        tneaget
+    %v30 = call i32 @llvm.mblaze.fsl.tnecget(i32 1)
+    ; CHECK-NOT:    tnecgetd
+    ; CHECK:        tnecget
+    %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 1)
+    ; CHECK-NOT:    tnecagetd
+    ; CHECK:        tnecaget
+    ret i32 1
+    ; CHECK:        rtsd
+}
+
+define void @putfsl(i32 %value, i32 %port)
+{
+    ; CHECK:        putfsl:
+    call void @llvm.mblaze.fsl.put(i32 %value, i32 %port)
+    ; CHECK:        putd
+    call void @llvm.mblaze.fsl.aput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   aputd
+    call void @llvm.mblaze.fsl.cput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   cputd
+    call void @llvm.mblaze.fsl.caput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   caputd
+    call void @llvm.mblaze.fsl.nput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   nputd
+    call void @llvm.mblaze.fsl.naput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   naputd
+    call void @llvm.mblaze.fsl.ncput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   ncputd
+    call void @llvm.mblaze.fsl.ncaput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   ncaputd
+    call void @llvm.mblaze.fsl.tput(i32 %port)
+    ; CHECK-NEXT:   tputd
+    call void @llvm.mblaze.fsl.taput(i32 %port)
+    ; CHECK-NEXT:   taputd
+    call void @llvm.mblaze.fsl.tcput(i32 %port)
+    ; CHECK-NEXT:   tcputd
+    call void @llvm.mblaze.fsl.tcaput(i32 %port)
+    ; CHECK-NEXT:   tcaputd
+    call void @llvm.mblaze.fsl.tnput(i32 %port)
+    ; CHECK-NEXT:   tnputd
+    call void @llvm.mblaze.fsl.tnaput(i32 %port)
+    ; CHECK-NEXT:   tnaputd
+    call void @llvm.mblaze.fsl.tncput(i32 %port)
+    ; CHECK-NEXT:   tncputd
+    call void @llvm.mblaze.fsl.tncaput(i32 %port)
+    ; CHECK-NEXT:   tncaputd
+    ret void
+    ; CHECK:        rtsd
+}
+
+define void @putfsl_const(i32 %value)
+{
+    ; CHECK:        putfsl_const:
+    call void @llvm.mblaze.fsl.put(i32 %value, i32 1)
+    ; CHECK-NOT:    putd
+    ; CHECK:        put
+    call void @llvm.mblaze.fsl.aput(i32 %value, i32 1)
+    ; CHECK-NOT:    aputd
+    ; CHECK:        aput
+    call void @llvm.mblaze.fsl.cput(i32 %value, i32 1)
+    ; CHECK-NOT:    cputd
+    ; CHECK:        cput
+    call void @llvm.mblaze.fsl.caput(i32 %value, i32 1)
+    ; CHECK-NOT:    caputd
+    ; CHECK:        caput
+    call void @llvm.mblaze.fsl.nput(i32 %value, i32 1)
+    ; CHECK-NOT:    nputd
+    ; CHECK:        nput
+    call void @llvm.mblaze.fsl.naput(i32 %value, i32 1)
+    ; CHECK-NOT:    naputd
+    ; CHECK:        naput
+    call void @llvm.mblaze.fsl.ncput(i32 %value, i32 1)
+    ; CHECK-NOT:    ncputd
+    ; CHECK:        ncput
+    call void @llvm.mblaze.fsl.ncaput(i32 %value, i32 1)
+    ; CHECK-NOT:    ncaputd
+    ; CHECK:        ncaput
+    call void @llvm.mblaze.fsl.tput(i32 1)
+    ; CHECK-NOT:    tputd
+    ; CHECK:        tput
+    call void @llvm.mblaze.fsl.taput(i32 1)
+    ; CHECK-NOT:    taputd
+    ; CHECK:        taput
+    call void @llvm.mblaze.fsl.tcput(i32 1)
+    ; CHECK-NOT:    tcputd
+    ; CHECK:        tcput
+    call void @llvm.mblaze.fsl.tcaput(i32 1)
+    ; CHECK-NOT:    tcaputd
+    ; CHECK:        tcaput
+    call void @llvm.mblaze.fsl.tnput(i32 1)
+    ; CHECK-NOT:    tnputd
+    ; CHECK:        tnput
+    call void @llvm.mblaze.fsl.tnaput(i32 1)
+    ; CHECK-NOT:    tnaputd
+    ; CHECK:        tnaput
+    call void @llvm.mblaze.fsl.tncput(i32 1)
+    ; CHECK-NOT:    tncputd
+    ; CHECK:        tncput
+    call void @llvm.mblaze.fsl.tncaput(i32 1)
+    ; CHECK-NOT:    tncaputd
+    ; CHECK:        tncaput
+    ret void
+    ; CHECK:        rtsd
+}
diff --git a/test/CodeGen/MBlaze/imm.ll b/test/CodeGen/MBlaze/imm.ll
new file mode 100644
index 0000000000..85fad175b7
--- /dev/null
+++ b/test/CodeGen/MBlaze/imm.ll
@@ -0,0 +1,70 @@
+; Ensure that all immediate values that are 32-bits or less can be loaded 
+; using a single instruction and that immediate values 64-bits or less can
+; be loaded using two instructions.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+; RUN: llc < %s -march=mblaze -mattr=+fpu | FileCheck -check-prefix=FPU %s
+
+define i8 @retimm_i8() {
+    ; CHECK:        retimm_i8:
+    ; CHECK:        add
+    ; CHECK-NEXT:   rtsd
+    ; FPU:          retimm_i8:
+    ; FPU:          add
+    ; FPU-NEXT:     rtsd
+    ret i8 123
+}
+
+define i16 @retimm_i16() {
+    ; CHECK:        retimm_i16:
+    ; CHECK:        add
+    ; CHECK-NEXT:   rtsd
+    ; FPU:          retimm_i16:
+    ; FPU:          add
+    ; FPU-NEXT:     rtsd
+    ret i16 38212
+}
+
+define i32 @retimm_i32() {
+    ; CHECK:        retimm_i32:
+    ; CHECK:        add
+    ; CHECK-NEXT:   rtsd
+    ; FPU:          retimm_i32:
+    ; FPU:          add
+    ; FPU-NEXT:     rtsd
+    ret i32 2938128
+}
+
+define i64 @retimm_i64() {
+    ; CHECK:        retimm_i64:
+    ; CHECK:        add
+    ; CHECK-NEXT:   add
+    ; CHECK-NEXT:   rtsd
+    ; FPU:          retimm_i64:
+    ; FPU:          add
+    ; FPU-NEXT:     add
+    ; FPU-NEXT:     rtsd
+    ret i64 94581823
+}
+
+define float @retimm_float() {
+    ; CHECK:        retimm_float:
+    ; CHECK:        add
+    ; CHECK-NEXT:   rtsd
+    ; FPU:          retimm_float:
+    ; FPU:          or
+    ; FPU:          rtsd
+    ret float 12.0
+}
+
+define double @retimm_double() {
+    ; CHECK:        retimm_double:
+    ; CHECK:        add
+    ; CHECK-NEXT:   add
+    ; CHECK-NEXT:   rtsd
+    ; FPU:          retimm_double:
+    ; FPU:          add
+    ; FPU-NEXT:     add
+    ; FPU-NEXT:     rtsd
+    ret double 598382.39283873
+}
diff --git a/test/CodeGen/MBlaze/jumptable.ll b/test/CodeGen/MBlaze/jumptable.ll
new file mode 100644
index 0000000000..3f27c12f19
--- /dev/null
+++ b/test/CodeGen/MBlaze/jumptable.ll
@@ -0,0 +1,79 @@
+; Ensure that jump tables can be handled by the mblaze backend. The
+; jump table should be lowered to a "br" instruction using one of the
+; available registers.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+define i32 @jmptable(i32 %arg)
+{
+    ; CHECK:        jmptable:
+    switch i32 %arg, label %DEFAULT [ i32 0, label %L0
+                                      i32 1, label %L1
+                                      i32 2, label %L2
+                                      i32 3, label %L3
+                                      i32 4, label %L4
+                                      i32 5, label %L5
+                                      i32 6, label %L6
+                                      i32 7, label %L7
+                                      i32 8, label %L8
+                                      i32 9, label %L9 ]
+
+    ; CHECK:        lw [[REG:r[0-9]*]]
+    ; CHECK:        br [[REG]]
+L0:
+    %var0 = add i32 %arg, 0
+    br label %DONE
+
+L1:
+    %var1 = add i32 %arg, 1
+    br label %DONE
+
+L2:
+    %var2 = add i32 %arg, 2
+    br label %DONE
+
+L3:
+    %var3 = add i32 %arg, 3
+    br label %DONE
+
+L4:
+    %var4 = add i32 %arg, 4
+    br label %DONE
+
+L5:
+    %var5 = add i32 %arg, 5
+    br label %DONE
+
+L6:
+    %var6 = add i32 %arg, 6
+    br label %DONE
+
+L7:
+    %var7 = add i32 %arg, 7
+    br label %DONE
+
+L8:
+    %var8 = add i32 %arg, 8
+    br label %DONE
+
+L9:
+    %var9 = add i32 %arg, 9
+    br label %DONE
+
+DEFAULT:
+    unreachable
+
+DONE:
+    %rval = phi i32 [ %var0, %L0 ],
+                    [ %var1, %L1 ],
+                    [ %var2, %L2 ],
+                    [ %var3, %L3 ],
+                    [ %var4, %L4 ],
+                    [ %var5, %L5 ],
+                    [ %var6, %L6 ],
+                    [ %var7, %L7 ],
+                    [ %var8, %L8 ],
+                    [ %var9, %L9 ]
+    ret i32 %rval
+    ; CHECK:        rtsd
+}
diff --git a/test/CodeGen/MBlaze/loop.ll b/test/CodeGen/MBlaze/loop.ll
new file mode 100644
index 0000000000..b473020e66
--- /dev/null
+++ b/test/CodeGen/MBlaze/loop.ll
@@ -0,0 +1,47 @@
+; Test some complicated looping constructs to ensure that they
+; compile successfully and that some sort of branching is used
+; in the resulting code.
+;
+; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+@MSG = internal constant [19 x i8] c"Message: %d %d %d\0A\00"
+
+define i32 @loop(i32 %a, i32 %b)
+{
+    ; CHECK:        loop:
+entry:
+    br label %loop_outer
+
+loop_outer:
+    %outer.0 = phi i32 [ 0, %entry ], [ %outer.2, %loop_outer_finish ]
+    br label %loop_inner
+
+loop_inner:
+    %inner.0 = phi i32 [ %a, %loop_outer ], [ %inner.3, %loop_inner_finish ]
+    %inner.1 = phi i32 [ %b, %loop_outer ], [ %inner.4, %loop_inner_finish ]
+    %inner.2 = phi i32 [  0, %loop_outer ], [ %inner.5, %loop_inner_finish ]
+    %inner.3 = add i32 %inner.0, %inner.1
+    %inner.4 = mul i32 %inner.2, 11
+    br label %loop_inner_finish
+
+loop_inner_finish:
+    %inner.5 = add i32 %inner.2, 1
+    ; CHECK:        addi {{.*, 1}}
+
+    call i32 (i8*,...)* @printf( i8* getelementptr([19 x i8]* @MSG,i32 0,i32 0),
+                                 i32 %inner.0, i32 %inner.1, i32 %inner.2 )
+    ; CHECK:        brlid
+
+    %inner.6 = icmp eq i32 %inner.5, 100
+    ; CHECK:        cmp
+
+    br i1 %inner.6, label %loop_inner, label %loop_outer_finish
+    ; CHECK:        {{beq|bne}}
+
+loop_outer_finish:
+    %outer.1 = add i32 %outer.0, 1
+    %outer.2 = urem i32 %outer.1, 1500
+    br label %loop_outer
+    ; CHECK:        br
+}
diff --git a/test/CodeGen/MBlaze/mul.ll b/test/CodeGen/MBlaze/mul.ll
new file mode 100644
index 0000000000..65d3e22a3e
--- /dev/null
+++ b/test/CodeGen/MBlaze/mul.ll
@@ -0,0 +1,51 @@
+; Ensure that multiplication is lowered to function calls when the multiplier
+; unit is not available in the hardware and that function calls are not used
+; when the multiplier unit is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+mul | FileCheck -check-prefix=MUL %s
+
+define i8 @test_i8(i8 %a, i8 %b) {
+    ; FUN:        test_i8:
+    ; MUL:        test_i8:
+
+    %tmp.1 = mul i8 %a, %b
+    ; FUN-NOT:    mul
+    ; FUN:        brlid
+    ; MUL-NOT:    brlid
+    ; MUL:        mul
+
+    ret i8 %tmp.1
+    ; FUN:        rtsd
+    ; MUL:        rtsd
+}
+
+define i16 @test_i16(i16 %a, i16 %b) {
+    ; FUN:        test_i16:
+    ; MUL:        test_i16:
+
+    %tmp.1 = mul i16 %a, %b
+    ; FUN-NOT:    mul
+    ; FUN:        brlid
+    ; MUL-NOT:    brlid
+    ; MUL:        mul
+
+    ret i16 %tmp.1
+    ; FUN:        rtsd
+    ; MUL:        rtsd
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+    ; FUN:        test_i32:
+    ; MUL:        test_i32:
+
+    %tmp.1 = mul i32 %a, %b
+    ; FUN-NOT:    mul
+    ; FUN:        brlid
+    ; MUL-NOT:    brlid
+    ; MUL:        mul
+
+    ret i32 %tmp.1
+    ; FUN:        rtsd
+    ; MUL:        rtsd
+}
diff --git a/test/CodeGen/MBlaze/mul64.ll b/test/CodeGen/MBlaze/mul64.ll
new file mode 100644
index 0000000000..e0ef4138af
--- /dev/null
+++ b/test/CodeGen/MBlaze/mul64.ll
@@ -0,0 +1,23 @@
+; Ensure that multiplication is lowered to function calls when the 64-bit
+; multiplier unit is not available in the hardware and that function calls
+; are not used when the 64-bit multiplier unit is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+mul,+mul64 | \
+; RUN:      FileCheck -check-prefix=MUL %s
+
+define i64 @test_i64(i64 %a, i64 %b) {
+    ; FUN:        test_i64:
+    ; MUL:        test_i64:
+
+    %tmp.1 = mul i64 %a, %b
+    ; FUN-NOT:    mul
+    ; FUN:        brlid
+    ; MUL-NOT:    brlid
+    ; MUL:        mulh
+    ; MUL:        mul
+
+    ret i64 %tmp.1
+    ; FUN:        rtsd
+    ; MUL:        rtsd
+}
diff --git a/test/CodeGen/MBlaze/select.ll b/test/CodeGen/MBlaze/select.ll
new file mode 100644
index 0000000000..47a88a1e3c
--- /dev/null
+++ b/test/CodeGen/MBlaze/select.ll
@@ -0,0 +1,15 @@
+; Ensure that the select instruction is supported and is lowered to 
+; some sort of branch instruction.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+define i32 @testsel(i32 %a, i32 %b)
+{
+    ; CHECK:        testsel:
+    %tmp.1 = icmp eq i32 %a, %b
+    ; CHECK:        cmp
+    %tmp.2 = select i1 %tmp.1, i32 %a, i32 %b
+    ; CHECK:        {{bne|beq}}
+    ret i32 %tmp.2
+    ; CHECK:        rtsd
+}
diff --git a/test/CodeGen/MBlaze/shift.ll b/test/CodeGen/MBlaze/shift.ll
new file mode 100644
index 0000000000..186115ec19
--- /dev/null
+++ b/test/CodeGen/MBlaze/shift.ll
@@ -0,0 +1,117 @@
+; Ensure that shifts are lowered to loops when the barrel shifter unit is
+; not available in the hardware and that loops are not used when the
+; barrel shifter unit is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+barrel | FileCheck -check-prefix=SHT %s
+
+define i8 @test_i8(i8 %a, i8 %b) {
+    ; FUN:        test_i8:
+    ; SHT:        test_i8:
+
+    %tmp.1 = shl i8 %a, %b
+    ; FUN-NOT:    bsll
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    bnei
+    ; SHT:        bsll
+
+    ret i8 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+}
+
+define i8 @testc_i8(i8 %a, i8 %b) {
+    ; FUN:        testc_i8:
+    ; SHT:        testc_i8:
+
+    %tmp.1 = shl i8 %a, 5
+    ; FUN-NOT:    bsll
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    add
+    ; SHT-NOT:    bnei
+    ; SHT:        bslli
+
+    ret i8 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+}
+
+define i16 @test_i16(i16 %a, i16 %b) {
+    ; FUN:        test_i16:
+    ; SHT:        test_i16:
+
+    %tmp.1 = shl i16 %a, %b
+    ; FUN-NOT:    bsll
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    bnei
+    ; SHT:        bsll
+
+    ret i16 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+}
+
+define i16 @testc_i16(i16 %a, i16 %b) {
+    ; FUN:        testc_i16:
+    ; SHT:        testc_i16:
+
+    %tmp.1 = shl i16 %a, 5
+    ; FUN-NOT:    bsll
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    add
+    ; SHT-NOT:    bnei
+    ; SHT:        bslli
+
+    ret i16 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+    ; FUN:        test_i32:
+    ; SHT:        test_i32:
+
+    %tmp.1 = shl i32 %a, %b
+    ; FUN-NOT:    bsll
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    bnei
+    ; SHT:        bsll
+
+    ret i32 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+}
+
+define i32 @testc_i32(i32 %a, i32 %b) {
+    ; FUN:        testc_i32:
+    ; SHT:        testc_i32:
+
+    %tmp.1 = shl i32 %a, 5
+    ; FUN-NOT:    bsll
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    add
+    ; SHT-NOT:    bnei
+    ; SHT:        bslli
+
+    ret i32 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+}
diff --git a/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
index 3340494f6b..4f9a7248bb 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+; RUN: llc < %s -march=msp430 | FileCheck %s
 target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
 target triple = "msp430-generic-generic"
 
@@ -29,7 +29,7 @@ define i8 @am3(i8 %x, i16 %n) nounwind {
 	ret i8 %3
 }
 ; CHECK: am3:
-; CHECK:		bis.b	&bar(r14), r15
+; CHECK:		bis.b	bar(r14), r15
 
 define i16 @am4(i16 %x) nounwind {
 	%1 = volatile load i16* inttoptr(i16 32 to i16*)
@@ -70,5 +70,5 @@ define i8 @am7(i8 %x, i16 %n) nounwind {
 	ret i8 %4
 }
 ; CHECK: am7:
-; CHECK:		bis.b	&duh+2(r14), r15
+; CHECK:		bis.b	duh+2(r14), r15
 
diff --git a/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
index ca79fb6d33..17ebd87368 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+; RUN: llc < %s -march=msp430 | FileCheck %s
 target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:16"
 target triple = "msp430-generic-generic"
 
@@ -32,7 +32,7 @@ define void @am3(i16 %i, i8 %x) nounwind {
 	ret void
 }
 ; CHECK: am3:
-; CHECK:		bis.b	r14, &bar(r15)
+; CHECK:		bis.b	r14, bar(r15)
 
 define void @am4(i16 %x) nounwind {
 	%1 = volatile load i16* inttoptr(i16 32 to i16*)
@@ -77,5 +77,5 @@ define void @am7(i16 %n, i8 %x) nounwind {
 	ret void
 }
 ; CHECK: am7:
-; CHECK:		bis.b	r14, &duh+2(r15)
+; CHECK:		bis.b	r14, duh+2(r15)
 
diff --git a/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
index 67cbb021c8..6676b88cd1 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+; RUN: llc < %s -march=msp430 | FileCheck %s
 target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
 target triple = "msp430-generic-generic"
 
@@ -26,7 +26,7 @@ define i8 @am3(i16 %n) nounwind {
 	ret i8 %2
 }
 ; CHECK: am3:
-; CHECK:		mov.b	&bar(r15), r15
+; CHECK:		mov.b	bar(r15), r15
 
 define i16 @am4() nounwind {
 	%1 = volatile load i16* inttoptr(i16 32 to i16*)
@@ -63,5 +63,5 @@ define i8 @am7(i16 %n) nounwind {
 	ret i8 %3
 }
 ; CHECK: am7:
-; CHECK:		mov.b	&duh+2(r15), r15
+; CHECK:		mov.b	duh+2(r15), r15
 
diff --git a/test/CodeGen/MSP430/AddrMode-mov-xr.ll b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
index b8155d3a55..4b327b0578 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+; RUN: llc < %s -march=msp430 | FileCheck %s
 target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
 target triple = "msp430-generic-generic"
 
@@ -26,7 +26,7 @@ define void @am3(i16 %i, i8 %a) nounwind {
 	ret void
 }
 ; CHECK: am3:
-; CHECK:		mov.b	r14, &bar(r15)
+; CHECK:		mov.b	r14, bar(r15)
 
 define void @am4(i16 %a) nounwind {
 	volatile store i16 %a, i16* inttoptr(i16 32 to i16*)
@@ -63,5 +63,5 @@ define void @am7(i16 %n, i8 %a) nounwind {
 	ret void
 }
 ; CHECK: am7:
-; CHECK:		mov.b	r14, &duh+2(r15)
+; CHECK:		mov.b	r14, duh+2(r15)
 
diff --git a/test/CodeGen/MSP430/Inst16mm.ll b/test/CodeGen/MSP430/Inst16mm.ll
index 510afe3734..2337c2c0f2 100644
--- a/test/CodeGen/MSP430/Inst16mm.ll
+++ b/test/CodeGen/MSP430/Inst16mm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=msp430 < %s | FileCheck %s
+; RUN: llc -march=msp430 -combiner-alias-analysis < %s | FileCheck %s
 target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
 target triple = "msp430-generic-generic"
 @foo = common global i16 0, align 2
@@ -52,3 +52,18 @@ define void @xor() nounwind {
 	ret void
 }
 
+define i16 @mov2() nounwind {
+entry:
+ %retval = alloca i16                            ; <i16*> [#uses=3]
+ %x = alloca i32, align 2                        ; <i32*> [#uses=1]
+ %y = alloca i32, align 2                        ; <i32*> [#uses=1]
+ store i16 0, i16* %retval
+ %tmp = load i32* %y                             ; <i32> [#uses=1]
+ store i32 %tmp, i32* %x
+ store i16 0, i16* %retval
+ %0 = load i16* %retval                          ; <i16> [#uses=1]
+ ret i16 %0
+; CHECK: mov2:
+; CHECK:	mov.w	0(r1), 4(r1)
+; CHECK:	mov.w	2(r1), 6(r1)
+}
diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll
index 74feaae4eb..0f5fc12b62 100644
--- a/test/CodeGen/MSP430/Inst8rr.ll
+++ b/test/CodeGen/MSP430/Inst8rr.ll
@@ -10,7 +10,7 @@ define i8 @mov(i8 %a, i8 %b) nounwind {
 
 define i8 @add(i8 %a, i8 %b) nounwind {
 ; CHECK: add:
-; CHECK: add.b	r14, r15
+; CHECK: add.b	r12, r15
 	%1 = add i8 %a, %b
 	ret i8 %1
 }
diff --git a/test/CodeGen/MSP430/bit.ll b/test/CodeGen/MSP430/bit.ll
index cd664a17bf..03d672bcbe 100644
--- a/test/CodeGen/MSP430/bit.ll
+++ b/test/CodeGen/MSP430/bit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+; RUN: llc < %s -march=msp430 | FileCheck %s
 target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:32"
 target triple = "msp430-generic-generic"
 
diff --git a/test/CodeGen/MSP430/setcc.ll b/test/CodeGen/MSP430/setcc.ll
index 9db51cce73..c99b17e143 100644
--- a/test/CodeGen/MSP430/setcc.ll
+++ b/test/CodeGen/MSP430/setcc.ll
@@ -10,9 +10,9 @@ define i16 @sccweqand(i16 %a, i16 %b) nounwind {
 }
 ; CHECK: sccweqand:
 ; CHECK:	bit.w	r14, r15
-; CHECK-NEXT:	mov.w	r2, r15
-; CHECK-NEXT:	and.w	#1, r15
-; CHECK-NEXT:	xor.w	#1, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	rra.w   r15
+; CHECK:	and.w	#1, r15
 
 define i16 @sccwneand(i16 %a, i16 %b) nounwind {
 	%t1 = and i16 %a, %b
@@ -22,8 +22,8 @@ define i16 @sccwneand(i16 %a, i16 %b) nounwind {
 }
 ; CHECK: sccwneand:
 ; CHECK: 	bit.w	r14, r15
-; CHECK-NEXT:	mov.w	r2, r15
-; CHECK-NEXT:	and.w	#1, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
 
 define i16 @sccwne(i16 %a, i16 %b) nounwind {
 	%t1 = icmp ne i16 %a, %b
@@ -32,9 +32,10 @@ define i16 @sccwne(i16 %a, i16 %b) nounwind {
 }
 ; CHECK:sccwne:
 ; CHECK:	cmp.w	r14, r15
-; CHECK-NEXT:	mov.w	r2, r15
-; CHECK-NEXT:	rra.w	r15
-; CHECK-NEXT:	and.w	#1, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	rra.w	r15
+; CHECK:	and.w	#1, r15
+; CHECK:	xor.w   #1, r15
 
 define i16 @sccweq(i16 %a, i16 %b) nounwind {
 	%t1 = icmp eq i16 %a, %b
@@ -43,10 +44,9 @@ define i16 @sccweq(i16 %a, i16 %b) nounwind {
 }
 ; CHECK:sccweq:
 ; CHECK:	cmp.w	r14, r15
-; CHECK-NEXT:	mov.w	r2, r15
-; CHECK-NEXT:	rra.w	r15
-; CHECK-NEXT:	and.w	#1, r15
-; CHECK-NEXT:	xor.w	#1, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	rra.w	r15
+; CHECK:	and.w	#1, r15
 
 define i16 @sccwugt(i16 %a, i16 %b) nounwind {
 	%t1 = icmp ugt i16 %a, %b
@@ -55,9 +55,9 @@ define i16 @sccwugt(i16 %a, i16 %b) nounwind {
 }
 ; CHECK:sccwugt:
 ; CHECK:	cmp.w	r15, r14
-; CHECK-NEXT:	mov.w	r2, r15
-; CHECK-NEXT:	and.w	#1, r15
-; CHECK-NEXT:	xor.w	#1, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
+; CHECK:	xor.w	#1, r15
 
 define i16 @sccwuge(i16 %a, i16 %b) nounwind {
 	%t1 = icmp uge i16 %a, %b
@@ -66,8 +66,8 @@ define i16 @sccwuge(i16 %a, i16 %b) nounwind {
 }
 ; CHECK:sccwuge:
 ; CHECK:	cmp.w	r14, r15
-; CHECK-NEXT:	mov.w	r2, r15
-; CHECK-NEXT:	and.w	#1, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
 
 define i16 @sccwult(i16 %a, i16 %b) nounwind {
 	%t1 = icmp ult i16 %a, %b
@@ -76,9 +76,9 @@ define i16 @sccwult(i16 %a, i16 %b) nounwind {
 }
 ; CHECK:sccwult:
 ; CHECK:	cmp.w	r14, r15
-; CHECK-NEXT:	mov.w	r2, r15
-; CHECK-NEXT:	and.w	#1, r15
-; CHECK-NEXT:	xor.w	#1, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
+; CHECK:	xor.w	#1, r15
 
 define i16 @sccwule(i16 %a, i16 %b) nounwind {
 	%t1 = icmp ule i16 %a, %b
@@ -87,8 +87,8 @@ define i16 @sccwule(i16 %a, i16 %b) nounwind {
 }
 ; CHECK:sccwule:
 ; CHECK:	cmp.w	r15, r14
-; CHECK-NEXT:	mov.w	r2, r15
-; CHECK-NEXT:	and.w	#1, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
 
 define i16 @sccwsgt(i16 %a, i16 %b) nounwind {
 	%t1 = icmp sgt i16 %a, %b
diff --git a/test/CodeGen/PIC16/C16-11.ll b/test/CodeGen/PIC16/C16-11.ll
index e70092b11c..8a5a0ac11f 100644
--- a/test/CodeGen/PIC16/C16-11.ll
+++ b/test/CodeGen/PIC16/C16-11.ll
@@ -1,4 +1,7 @@
-;RUN: llc < %s -march=pic16
+; RUN: llc < %s -march=pic16
+; XFAIL: *
+; This fails because PIC16 doesn't define a (xor reg, reg) pattern.
+; 
 
 @c612.auto.a.b = internal global i1 false         ; <i1*> [#uses=2]
 @c612.auto.A.b = internal global i1 false         ; <i1*> [#uses=2]
diff --git a/test/CodeGen/PIC16/C16-15.ll b/test/CodeGen/PIC16/C16-15.ll
index 2e1dc0c013..5ca2d4a9bd 100644
--- a/test/CodeGen/PIC16/C16-15.ll
+++ b/test/CodeGen/PIC16/C16-15.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=pic16 | grep "extern	@.lib.unordered.f32" | count 3
+; RUN: llc < %s -march=pic16 | grep "extern" | grep "@.lib.unordered.f32" | count 3
 
 @pc = global i8* inttoptr (i64 160 to i8*), align 1 ; <i8**> [#uses=2]
 @aa = common global i16 0, align 1                ; <i16*> [#uses=0]
diff --git a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
index d1d28ae15b..be28a9a454 100644
--- a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
+++ b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s | grep {subfc r3,r5,r4}
-; RUN: llc < %s | grep {subfze r4,r2}
-; RUN: llc < %s -regalloc=local | grep {subfc r2,r5,r4}
+; RUN: llc < %s | grep {subfze r4,r6}
+; RUN: llc < %s -regalloc=local | grep {subfc r6,r5,r4}
 ; RUN: llc < %s -regalloc=local | grep {subfze r3,r3}
 ; The first argument of subfc must not be the same as any other register.
 
diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
index 5d09696933..50a02781fd 100644
--- a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
+++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin10 -mcpu=g5 | FileCheck %s
 ; ModuleID = '<stdin>'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin10.0"
@@ -10,8 +10,8 @@ target triple = "powerpc-apple-darwin10.0"
 define void @foo(i32 %y) nounwind ssp {
 entry:
 ; CHECK: foo
-; CHECK: add r2
-; CHECK: 0(r2)
+; CHECK: add r4
+; CHECK: 0(r4)
   %y_addr = alloca i32                            ; <i32*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store i32 %y, i32* %y_addr
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
new file mode 100644
index 0000000000..b73382e6eb
--- /dev/null
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
+; ModuleID = 'hh.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9.6"
+; This formerly used R0 for both the stack address and CR.
+
+define void @foo() nounwind {
+entry:
+;CHECK:  mfcr r2
+;CHECK:  rlwinm r2, r2, 8, 0, 31
+;CHECK:  lis r0, 1
+;CHECK:  ori r0, r0, 34540
+;CHECK:  stwx r2, r1, r0
+  %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
+  call void @bar(i8* %x1) nounwind
+  call void asm sideeffect "", "~{cr2}"() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+;CHECK:  lis r0, 1
+;CHECK:  ori r0, r0, 34540
+;CHECK:  lwzx r2, r1, r0
+;CHECK:  rlwinm r2, r2, 24, 0, 31
+;CHECK:  mtcrf 32, r2
+  ret void
+}
+
+declare void @bar(i8*)
diff --git a/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll b/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll
new file mode 100644
index 0000000000..f43f5cae6e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll
@@ -0,0 +1,433 @@
+; RUN: llc < %s -O3 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9.6"
+
+; There should be no stfs spills
+; CHECK: main:
+; CHECK-NOT: stfs
+; CHECK: .section
+
+@.str66 = external constant [3 x i8], align 4     ; <[3 x i8]*> [#uses=1]
+@.str31 = external constant [6 x i8], align 4     ; <[6 x i8]*> [#uses=1]
+@.str61 = external constant [21 x i8], align 4    ; <[21 x i8]*> [#uses=1]
+@.str101 = external constant [61 x i8], align 4   ; <[61 x i8]*> [#uses=1]
+@.str104 = external constant [31 x i8], align 4   ; <[31 x i8]*> [#uses=1]
+@.str105 = external constant [45 x i8], align 4   ; <[45 x i8]*> [#uses=1]
+@.str112 = external constant [38 x i8], align 4   ; <[38 x i8]*> [#uses=1]
+@.str121 = external constant [36 x i8], align 4   ; <[36 x i8]*> [#uses=1]
+@.str12293 = external constant [67 x i8], align 4 ; <[67 x i8]*> [#uses=1]
+@.str123 = external constant [68 x i8], align 4   ; <[68 x i8]*> [#uses=1]
+@.str124 = external constant [52 x i8], align 4   ; <[52 x i8]*> [#uses=1]
+@.str125 = external constant [51 x i8], align 4   ; <[51 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** %argv) noreturn nounwind {
+entry:
+  br i1 undef, label %bb4.i1, label %my_fopen.exit
+
+bb4.i1:                                           ; preds = %entry
+  unreachable
+
+my_fopen.exit:                                    ; preds = %entry
+  br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:                                             ; preds = %my_fopen.exit
+  unreachable
+
+bb1.i:                                            ; preds = %my_fopen.exit
+  br label %bb134.i
+
+bb2.i:                                            ; preds = %bb134.i
+  %0 = icmp eq i32 undef, 0                       ; <i1> [#uses=1]
+  br i1 %0, label %bb20.i, label %bb21.i
+
+bb20.i:                                           ; preds = %bb2.i
+  br label %bb134.i
+
+bb21.i:                                           ; preds = %bb2.i
+  %1 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([6 x i8]* @.str31, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
+  br i1 undef, label %bb30.i, label %bb31.i
+
+bb30.i:                                           ; preds = %bb21.i
+  br label %bb134.i
+
+bb31.i:                                           ; preds = %bb21.i
+  br i1 undef, label %bb41.i, label %bb44.i
+
+bb41.i:                                           ; preds = %bb31.i
+  %2 = icmp slt i32 undef, %argc                  ; <i1> [#uses=1]
+  br i1 %2, label %bb1.i77.i, label %bb2.i78.i
+
+bb1.i77.i:                                        ; preds = %bb41.i
+  %3 = load float* undef, align 4                 ; <float> [#uses=2]
+  %4 = fcmp ugt float %3, 0.000000e+00            ; <i1> [#uses=1]
+  br i1 %4, label %bb43.i, label %bb42.i
+
+bb2.i78.i:                                        ; preds = %bb41.i
+  unreachable
+
+bb42.i:                                           ; preds = %bb1.i77.i
+  unreachable
+
+bb43.i:                                           ; preds = %bb1.i77.i
+  br label %bb134.i
+
+bb44.i:                                           ; preds = %bb31.i
+  br i1 undef, label %bb45.i, label %bb49.i
+
+bb45.i:                                           ; preds = %bb44.i
+  %5 = icmp slt i32 undef, %argc                  ; <i1> [#uses=1]
+  br i1 %5, label %bb1.i72.i, label %bb2.i73.i
+
+bb1.i72.i:                                        ; preds = %bb45.i
+  %6 = load float* undef, align 4                 ; <float> [#uses=3]
+  %7 = fcmp ult float %6, 1.000000e+00            ; <i1> [#uses=1]
+  %or.cond.i = and i1 undef, %7                   ; <i1> [#uses=1]
+  br i1 %or.cond.i, label %bb48.i, label %bb47.i
+
+bb2.i73.i:                                        ; preds = %bb45.i
+  unreachable
+
+bb47.i:                                           ; preds = %bb1.i72.i
+  unreachable
+
+bb48.i:                                           ; preds = %bb1.i72.i
+  br label %bb134.i
+
+bb49.i:                                           ; preds = %bb44.i
+  br i1 undef, label %bb50.i, label %bb53.i
+
+bb50.i:                                           ; preds = %bb49.i
+  br i1 false, label %bb1.i67.i, label %bb2.i68.i
+
+bb1.i67.i:                                        ; preds = %bb50.i
+  br i1 false, label %read_float_option.exit69.i, label %bb1.i67.bb2.i68_crit_edge.i
+
+bb1.i67.bb2.i68_crit_edge.i:                      ; preds = %bb1.i67.i
+  br label %bb2.i68.i
+
+bb2.i68.i:                                        ; preds = %bb1.i67.bb2.i68_crit_edge.i, %bb50.i
+  unreachable
+
+read_float_option.exit69.i:                       ; preds = %bb1.i67.i
+  br i1 undef, label %bb52.i, label %bb51.i
+
+bb51.i:                                           ; preds = %read_float_option.exit69.i
+  unreachable
+
+bb52.i:                                           ; preds = %read_float_option.exit69.i
+  br label %bb134.i
+
+bb53.i:                                           ; preds = %bb49.i
+  %8 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([21 x i8]* @.str61, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
+  br i1 false, label %bb89.i, label %bb92.i
+
+bb89.i:                                           ; preds = %bb53.i
+  br i1 undef, label %bb1.i27.i, label %bb2.i28.i
+
+bb1.i27.i:                                        ; preds = %bb89.i
+  unreachable
+
+bb2.i28.i:                                        ; preds = %bb89.i
+  unreachable
+
+bb92.i:                                           ; preds = %bb53.i
+  br i1 undef, label %bb93.i, label %bb96.i
+
+bb93.i:                                           ; preds = %bb92.i
+  br i1 undef, label %bb1.i22.i, label %bb2.i23.i
+
+bb1.i22.i:                                        ; preds = %bb93.i
+  br i1 undef, label %bb95.i, label %bb94.i
+
+bb2.i23.i:                                        ; preds = %bb93.i
+  unreachable
+
+bb94.i:                                           ; preds = %bb1.i22.i
+  unreachable
+
+bb95.i:                                           ; preds = %bb1.i22.i
+  br label %bb134.i
+
+bb96.i:                                           ; preds = %bb92.i
+  br i1 undef, label %bb97.i, label %bb100.i
+
+bb97.i:                                           ; preds = %bb96.i
+  %9 = icmp slt i32 undef, %argc                  ; <i1> [#uses=1]
+  br i1 %9, label %bb1.i17.i, label %bb2.i18.i
+
+bb1.i17.i:                                        ; preds = %bb97.i
+  %10 = call i32 (i8*, i8*, ...)* @"\01_sscanf$LDBL128"(i8* undef, i8* getelementptr inbounds ([3 x i8]* @.str66, i32 0, i32 0), float* undef) nounwind ; <i32> [#uses=1]
+  %phitmp.i16.i = icmp eq i32 %10, 1              ; <i1> [#uses=1]
+  br i1 %phitmp.i16.i, label %read_float_option.exit19.i, label %bb1.i17.bb2.i18_crit_edge.i
+
+bb1.i17.bb2.i18_crit_edge.i:                      ; preds = %bb1.i17.i
+  br label %bb2.i18.i
+
+bb2.i18.i:                                        ; preds = %bb1.i17.bb2.i18_crit_edge.i, %bb97.i
+  unreachable
+
+read_float_option.exit19.i:                       ; preds = %bb1.i17.i
+  br i1 false, label %bb99.i, label %bb98.i
+
+bb98.i:                                           ; preds = %read_float_option.exit19.i
+  unreachable
+
+bb99.i:                                           ; preds = %read_float_option.exit19.i
+  br label %bb134.i
+
+bb100.i:                                          ; preds = %bb96.i
+  br i1 false, label %bb101.i, label %bb104.i
+
+bb101.i:                                          ; preds = %bb100.i
+  br i1 false, label %bb1.i12.i, label %bb2.i13.i
+
+bb1.i12.i:                                        ; preds = %bb101.i
+  br i1 undef, label %bb102.i, label %bb103.i
+
+bb2.i13.i:                                        ; preds = %bb101.i
+  unreachable
+
+bb102.i:                                          ; preds = %bb1.i12.i
+  unreachable
+
+bb103.i:                                          ; preds = %bb1.i12.i
+  br label %bb134.i
+
+bb104.i:                                          ; preds = %bb100.i
+  unreachable
+
+bb134.i:                                          ; preds = %bb103.i, %bb99.i, %bb95.i, %bb52.i, %bb48.i, %bb43.i, %bb30.i, %bb20.i, %bb1.i
+  %annealing_sched.1.0 = phi float [ 1.000000e+01, %bb1.i ], [ %annealing_sched.1.0, %bb20.i ], [ 1.000000e+00, %bb30.i ], [ %annealing_sched.1.0, %bb43.i ], [ %annealing_sched.1.0, %bb48.i ], [ %annealing_sched.1.0, %bb52.i ], [ %annealing_sched.1.0, %bb95.i ], [ %annealing_sched.1.0, %bb99.i ], [ %annealing_sched.1.0, %bb103.i ] ; <float> [#uses=8]
+  %annealing_sched.2.0 = phi float [ 1.000000e+02, %bb1.i ], [ %annealing_sched.2.0, %bb20.i ], [ %annealing_sched.2.0, %bb30.i ], [ %3, %bb43.i ], [ %annealing_sched.2.0, %bb48.i ], [ %annealing_sched.2.0, %bb52.i ], [ %annealing_sched.2.0, %bb95.i ], [ %annealing_sched.2.0, %bb99.i ], [ %annealing_sched.2.0, %bb103.i ] ; <float> [#uses=8]
+  %annealing_sched.3.0 = phi float [ 0x3FE99999A0000000, %bb1.i ], [ %annealing_sched.3.0, %bb20.i ], [ %annealing_sched.3.0, %bb30.i ], [ %annealing_sched.3.0, %bb43.i ], [ %6, %bb48.i ], [ %annealing_sched.3.0, %bb52.i ], [ %annealing_sched.3.0, %bb95.i ], [ %annealing_sched.3.0, %bb99.i ], [ %annealing_sched.3.0, %bb103.i ] ; <float> [#uses=8]
+  %annealing_sched.4.0 = phi float [ 0x3F847AE140000000, %bb1.i ], [ %annealing_sched.4.0, %bb20.i ], [ %annealing_sched.4.0, %bb30.i ], [ %annealing_sched.4.0, %bb43.i ], [ %annealing_sched.4.0, %bb48.i ], [ 0.000000e+00, %bb52.i ], [ %annealing_sched.4.0, %bb95.i ], [ %annealing_sched.4.0, %bb99.i ], [ %annealing_sched.4.0, %bb103.i ] ; <float> [#uses=8]
+  %router_opts.0.0 = phi float [ 0.000000e+00, %bb1.i ], [ %router_opts.0.0, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %router_opts.0.0, %bb43.i ], [ %router_opts.0.0, %bb48.i ], [ %router_opts.0.0, %bb52.i ], [ %router_opts.0.0, %bb95.i ], [ %router_opts.0.0, %bb99.i ], [ %router_opts.0.0, %bb103.i ] ; <float> [#uses=8]
+  %router_opts.1.0 = phi float [ 5.000000e-01, %bb1.i ], [ %router_opts.1.0, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %router_opts.1.0, %bb43.i ], [ %router_opts.1.0, %bb48.i ], [ %router_opts.1.0, %bb52.i ], [ undef, %bb95.i ], [ %router_opts.1.0, %bb99.i ], [ %router_opts.1.0, %bb103.i ] ; <float> [#uses=7]
+  %router_opts.2.0 = phi float [ 1.500000e+00, %bb1.i ], [ %router_opts.2.0, %bb20.i ], [ %router_opts.2.0, %bb30.i ], [ %router_opts.2.0, %bb43.i ], [ %router_opts.2.0, %bb48.i ], [ %router_opts.2.0, %bb52.i ], [ %router_opts.2.0, %bb95.i ], [ undef, %bb99.i ], [ %router_opts.2.0, %bb103.i ] ; <float> [#uses=8]
+  %router_opts.3.0 = phi float [ 0x3FC99999A0000000, %bb1.i ], [ %router_opts.3.0, %bb20.i ], [ %router_opts.3.0, %bb30.i ], [ %router_opts.3.0, %bb43.i ], [ %router_opts.3.0, %bb48.i ], [ %router_opts.3.0, %bb52.i ], [ %router_opts.3.0, %bb95.i ], [ %router_opts.3.0, %bb99.i ], [ 0.000000e+00, %bb103.i ] ; <float> [#uses=8]
+  %11 = phi float [ 0x3FC99999A0000000, %bb1.i ], [ %11, %bb20.i ], [ %11, %bb30.i ], [ %11, %bb43.i ], [ %11, %bb48.i ], [ %11, %bb52.i ], [ %11, %bb95.i ], [ %11, %bb99.i ], [ 0.000000e+00, %bb103.i ] ; <float> [#uses=8]
+  %12 = phi float [ 1.500000e+00, %bb1.i ], [ %12, %bb20.i ], [ %12, %bb30.i ], [ %12, %bb43.i ], [ %12, %bb48.i ], [ %12, %bb52.i ], [ %12, %bb95.i ], [ undef, %bb99.i ], [ %12, %bb103.i ] ; <float> [#uses=8]
+  %13 = phi float [ 5.000000e-01, %bb1.i ], [ %13, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %13, %bb43.i ], [ %13, %bb48.i ], [ %13, %bb52.i ], [ undef, %bb95.i ], [ %13, %bb99.i ], [ %13, %bb103.i ] ; <float> [#uses=7]
+  %14 = phi float [ 0.000000e+00, %bb1.i ], [ %14, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %14, %bb43.i ], [ %14, %bb48.i ], [ %14, %bb52.i ], [ %14, %bb95.i ], [ %14, %bb99.i ], [ %14, %bb103.i ] ; <float> [#uses=8]
+  %15 = phi float [ 0x3FE99999A0000000, %bb1.i ], [ %15, %bb20.i ], [ %15, %bb30.i ], [ %15, %bb43.i ], [ %6, %bb48.i ], [ %15, %bb52.i ], [ %15, %bb95.i ], [ %15, %bb99.i ], [ %15, %bb103.i ] ; <float> [#uses=8]
+  %16 = phi float [ 0x3F847AE140000000, %bb1.i ], [ %16, %bb20.i ], [ %16, %bb30.i ], [ %16, %bb43.i ], [ %16, %bb48.i ], [ 0.000000e+00, %bb52.i ], [ %16, %bb95.i ], [ %16, %bb99.i ], [ %16, %bb103.i ] ; <float> [#uses=8]
+  %17 = phi float [ 1.000000e+01, %bb1.i ], [ %17, %bb20.i ], [ 1.000000e+00, %bb30.i ], [ %17, %bb43.i ], [ %17, %bb48.i ], [ %17, %bb52.i ], [ %17, %bb95.i ], [ %17, %bb99.i ], [ %17, %bb103.i ] ; <float> [#uses=8]
+  %18 = icmp slt i32 undef, %argc                 ; <i1> [#uses=1]
+  br i1 %18, label %bb2.i, label %bb135.i
+
+bb135.i:                                          ; preds = %bb134.i
+  br i1 undef, label %bb141.i, label %bb142.i
+
+bb141.i:                                          ; preds = %bb135.i
+  unreachable
+
+bb142.i:                                          ; preds = %bb135.i
+  br i1 undef, label %bb145.i, label %bb144.i
+
+bb144.i:                                          ; preds = %bb142.i
+  unreachable
+
+bb145.i:                                          ; preds = %bb142.i
+  br i1 undef, label %bb146.i, label %bb147.i
+
+bb146.i:                                          ; preds = %bb145.i
+  unreachable
+
+bb147.i:                                          ; preds = %bb145.i
+  br i1 undef, label %bb148.i, label %bb155.i
+
+bb148.i:                                          ; preds = %bb147.i
+  br label %bb155.i
+
+bb155.i:                                          ; preds = %bb148.i, %bb147.i
+  br i1 undef, label %bb156.i, label %bb161.i
+
+bb156.i:                                          ; preds = %bb155.i
+  unreachable
+
+bb161.i:                                          ; preds = %bb155.i
+  br i1 undef, label %bb162.i, label %bb163.i
+
+bb162.i:                                          ; preds = %bb161.i
+  %19 = fpext float %17 to double                 ; <double> [#uses=1]
+  %20 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([61 x i8]* @.str101, i32 0, i32 0), double %19) nounwind ; <i32> [#uses=0]
+  unreachable
+
+bb163.i:                                          ; preds = %bb161.i
+  %21 = fpext float %16 to double                 ; <double> [#uses=1]
+  %22 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([31 x i8]* @.str104, i32 0, i32 0), double %21) nounwind ; <i32> [#uses=0]
+  %23 = fpext float %15 to double                 ; <double> [#uses=1]
+  %24 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([45 x i8]* @.str105, i32 0, i32 0), double %23) nounwind ; <i32> [#uses=0]
+  %25 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([38 x i8]* @.str112, i32 0, i32 0), double undef) nounwind ; <i32> [#uses=0]
+  br i1 undef, label %parse_command.exit, label %bb176.i
+
+bb176.i:                                          ; preds = %bb163.i
+  br i1 undef, label %bb177.i, label %bb178.i
+
+bb177.i:                                          ; preds = %bb176.i
+  unreachable
+
+bb178.i:                                          ; preds = %bb176.i
+  %26 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([36 x i8]* @.str121, i32 0, i32 0), double undef) nounwind ; <i32> [#uses=0]
+  %27 = fpext float %14 to double                 ; <double> [#uses=1]
+  %28 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([67 x i8]* @.str12293, i32 0, i32 0), double %27) nounwind ; <i32> [#uses=0]
+  %29 = fpext float %13 to double                 ; <double> [#uses=1]
+  %30 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([68 x i8]* @.str123, i32 0, i32 0), double %29) nounwind ; <i32> [#uses=0]
+  %31 = fpext float %12 to double                 ; <double> [#uses=1]
+  %32 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([52 x i8]* @.str124, i32 0, i32 0), double %31) nounwind ; <i32> [#uses=0]
+  %33 = fpext float %11 to double                 ; <double> [#uses=1]
+  %34 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([51 x i8]* @.str125, i32 0, i32 0), double %33) nounwind ; <i32> [#uses=0]
+  unreachable
+
+parse_command.exit:                               ; preds = %bb163.i
+  br i1 undef, label %bb4.i152.i, label %my_fopen.exit.i
+
+bb4.i152.i:                                       ; preds = %parse_command.exit
+  unreachable
+
+my_fopen.exit.i:                                  ; preds = %parse_command.exit
+  br i1 undef, label %bb.i6.i99, label %bb49.preheader.i.i
+
+bb.i6.i99:                                        ; preds = %my_fopen.exit.i
+  br i1 undef, label %bb3.i.i100, label %bb1.i8.i
+
+bb1.i8.i:                                         ; preds = %bb.i6.i99
+  unreachable
+
+bb3.i.i100:                                       ; preds = %bb.i6.i99
+  unreachable
+
+bb49.preheader.i.i:                               ; preds = %my_fopen.exit.i
+  br i1 undef, label %bb7.i11.i, label %bb50.i.i
+
+bb7.i11.i:                                        ; preds = %bb49.preheader.i.i
+  unreachable
+
+bb50.i.i:                                         ; preds = %bb49.preheader.i.i
+  br i1 undef, label %bb.i.i.i20.i, label %my_calloc.exit.i.i.i
+
+bb.i.i.i20.i:                                     ; preds = %bb50.i.i
+  unreachable
+
+my_calloc.exit.i.i.i:                             ; preds = %bb50.i.i
+  br i1 undef, label %bb.i.i37.i.i, label %alloc_hash_table.exit.i21.i
+
+bb.i.i37.i.i:                                     ; preds = %my_calloc.exit.i.i.i
+  unreachable
+
+alloc_hash_table.exit.i21.i:                      ; preds = %my_calloc.exit.i.i.i
+  br i1 undef, label %bb51.i.i, label %bb3.i23.i.i
+
+bb51.i.i:                                         ; preds = %alloc_hash_table.exit.i21.i
+  unreachable
+
+bb3.i23.i.i:                                      ; preds = %alloc_hash_table.exit.i21.i
+  br i1 undef, label %bb.i8.i.i, label %bb.nph.i.i
+
+bb.nph.i.i:                                       ; preds = %bb3.i23.i.i
+  unreachable
+
+bb.i8.i.i:                                        ; preds = %bb3.i.i34.i, %bb3.i23.i.i
+  br i1 undef, label %bb3.i.i34.i, label %bb1.i.i32.i
+
+bb1.i.i32.i:                                      ; preds = %bb.i8.i.i
+  unreachable
+
+bb3.i.i34.i:                                      ; preds = %bb.i8.i.i
+  br i1 undef, label %free_hash_table.exit.i.i, label %bb.i8.i.i
+
+free_hash_table.exit.i.i:                         ; preds = %bb3.i.i34.i
+  br i1 undef, label %check_netlist.exit.i, label %bb59.i.i
+
+bb59.i.i:                                         ; preds = %free_hash_table.exit.i.i
+  unreachable
+
+check_netlist.exit.i:                             ; preds = %free_hash_table.exit.i.i
+  br label %bb.i.i3.i
+
+bb.i.i3.i:                                        ; preds = %bb3.i.i4.i, %check_netlist.exit.i
+  br i1 false, label %bb3.i.i4.i, label %bb1.i.i.i122
+
+bb1.i.i.i122:                                     ; preds = %bb1.i.i.i122, %bb.i.i3.i
+  br i1 false, label %bb3.i.i4.i, label %bb1.i.i.i122
+
+bb3.i.i4.i:                                       ; preds = %bb1.i.i.i122, %bb.i.i3.i
+  br i1 undef, label %read_net.exit, label %bb.i.i3.i
+
+read_net.exit:                                    ; preds = %bb3.i.i4.i
+  br i1 undef, label %bb.i44, label %bb3.i47
+
+bb.i44:                                           ; preds = %read_net.exit
+  unreachable
+
+bb3.i47:                                          ; preds = %read_net.exit
+  br i1 false, label %bb9.i50, label %bb8.i49
+
+bb8.i49:                                          ; preds = %bb3.i47
+  unreachable
+
+bb9.i50:                                          ; preds = %bb3.i47
+  br i1 undef, label %bb11.i51, label %bb12.i52
+
+bb11.i51:                                         ; preds = %bb9.i50
+  unreachable
+
+bb12.i52:                                         ; preds = %bb9.i50
+  br i1 undef, label %bb.i.i53, label %my_malloc.exit.i54
+
+bb.i.i53:                                         ; preds = %bb12.i52
+  unreachable
+
+my_malloc.exit.i54:                               ; preds = %bb12.i52
+  br i1 undef, label %bb.i2.i55, label %my_malloc.exit3.i56
+
+bb.i2.i55:                                        ; preds = %my_malloc.exit.i54
+  unreachable
+
+my_malloc.exit3.i56:                              ; preds = %my_malloc.exit.i54
+  br i1 undef, label %bb.i.i.i57, label %my_malloc.exit.i.i
+
+bb.i.i.i57:                                       ; preds = %my_malloc.exit3.i56
+  unreachable
+
+my_malloc.exit.i.i:                               ; preds = %my_malloc.exit3.i56
+  br i1 undef, label %bb, label %bb10
+
+bb:                                               ; preds = %my_malloc.exit.i.i
+  unreachable
+
+bb10:                                             ; preds = %my_malloc.exit.i.i
+  br i1 false, label %bb12, label %bb11
+
+bb11:                                             ; preds = %bb10
+  unreachable
+
+bb12:                                             ; preds = %bb10
+  store float %annealing_sched.1.0, float* null, align 4
+  store float %annealing_sched.2.0, float* undef, align 8
+  store float %annealing_sched.3.0, float* undef, align 4
+  store float %annealing_sched.4.0, float* undef, align 8
+  store float %router_opts.0.0, float* undef, align 8
+  store float %router_opts.1.0, float* undef, align 4
+  store float %router_opts.2.0, float* null, align 8
+  store float %router_opts.3.0, float* undef, align 4
+  br i1 undef, label %place_and_route.exit, label %bb7.i22
+
+bb7.i22:                                          ; preds = %bb12
+  br i1 false, label %bb8.i23, label %bb9.i26
+
+bb8.i23:                                          ; preds = %bb7.i22
+  unreachable
+
+bb9.i26:                                          ; preds = %bb7.i22
+  unreachable
+
+place_and_route.exit:                             ; preds = %bb12
+  unreachable
+}
+
+declare i32 @"\01_printf$LDBL128"(i8*, ...) nounwind
+
+declare i32 @strcmp(i8* nocapture, i8* nocapture) nounwind readonly
+
+declare i32 @"\01_sscanf$LDBL128"(i8*, i8*, ...) nounwind
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
index aed4fdbb2d..466ae80341 100644
--- a/test/CodeGen/PowerPC/Frames-alloca.ll
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -24,7 +24,7 @@
 ; CHECK-PPC64-NOFP: ld r1, 0(r1)
 ; CHECK-PPC64-NOFP: ld r31, -8(r1)
 
-define i32* @f1(i32 %n) {
+define i32* @f1(i32 %n) nounwind {
 	%tmp = alloca i32, i32 %n		; <i32*> [#uses=1]
 	ret i32* %tmp
 }
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index 0f7acacbfa..b10a996867 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -3,14 +3,14 @@
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
 ; RUN:   grep {stw r3, 32751}
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {std r2, 9024}
+; RUN:   grep {std r3, 9024}
 
-define void @test() {
+define void @test() nounwind {
 	store i32 0, i32* inttoptr (i64 48725999 to i32*)
 	ret void
 }
 
-define void @test2() {
+define void @test2() nounwind {
 	store i64 0, i64* inttoptr (i64 74560 to i64*)
 	ret void
 }
diff --git a/test/CodeGen/PowerPC/addc.ll b/test/CodeGen/PowerPC/addc.ll
index 09a7fbd7a6..8c928ce8bc 100644
--- a/test/CodeGen/PowerPC/addc.ll
+++ b/test/CodeGen/PowerPC/addc.ll
@@ -1,26 +1,33 @@
 ; All of these should be codegen'd without loading immediates
-; RUN: llc < %s -march=ppc32 -o %t
-; RUN: grep addc %t | count 1
-; RUN: grep adde %t | count 1
-; RUN: grep addze %t | count 1
-; RUN: grep addme %t | count 1
-; RUN: grep addic %t | count 2
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
 
-define i64 @add_ll(i64 %a, i64 %b) {
+define i64 @add_ll(i64 %a, i64 %b) nounwind {
 entry:
         %tmp.2 = add i64 %b, %a         ; <i64> [#uses=1]
         ret i64 %tmp.2
+; CHECK: add_ll:
+; CHECK: addc r4, r6, r4
+; CHECK: adde r3, r5, r3
+; CHECK: blr
 }
 
-define i64 @add_l_5(i64 %a) {
+define i64 @add_l_5(i64 %a) nounwind {
 entry:
         %tmp.1 = add i64 %a, 5          ; <i64> [#uses=1]
         ret i64 %tmp.1
+; CHECK: add_l_5:
+; CHECK: addic r4, r4, 5
+; CHECK: addze r3, r3
+; CHECK: blr
 }
 
-define i64 @add_l_m5(i64 %a) {
+define i64 @add_l_m5(i64 %a) nounwind {
 entry:
         %tmp.1 = add i64 %a, -5         ; <i64> [#uses=1]
         ret i64 %tmp.1
+; CHECK: add_l_m5:
+; CHECK: addic r4, r4, -5
+; CHECK: addme r3, r3
+; CHECK: blr
 }
 
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index fbc7bd2264..2094e10a58 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -43,13 +43,13 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
-; PIC: addis r4, r2, ha16(L_BA4__foo_L5-"L1$pb")
-; PIC: li r5, lo16(L_BA4__foo_L5-"L1$pb")
-; PIC: add r4, r4, r5
-; PIC: stw r4
-; STATIC: li r2, lo16(L_BA4__foo_L5)
-; STATIC: addis r2, r2, ha16(L_BA4__foo_L5)
-; STATIC: stw r2
+; PIC: addis r5, r4, ha16(L_BA4__foo_L5-"L1$pb")
+; PIC: li r6, lo16(L_BA4__foo_L5-"L1$pb")
+; PIC: add r5, r5, r6
+; PIC: stw r5
+; STATIC: li r4, lo16(L_BA4__foo_L5)
+; STATIC: addis r4, r4, ha16(L_BA4__foo_L5)
+; STATIC: stw r4
   store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
   ret i32 %res.3
 }
diff --git a/test/CodeGen/PowerPC/lsr-postinc-pos.ll b/test/CodeGen/PowerPC/lsr-postinc-pos.ll
new file mode 100644
index 0000000000..f441e42da2
--- /dev/null
+++ b/test/CodeGen/PowerPC/lsr-postinc-pos.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -print-lsr-output |& FileCheck %s
+
+; The icmp is a post-inc use, and the increment is in %bb11, but the
+; scevgep needs to be inserted in %bb so that it is dominated by %t.
+
+; CHECK: %t = load i8** undef
+; CHECK: %scevgep = getelementptr i8* %t, i32 %lsr.iv.next
+; CHECK: %c1 = icmp ult i8* %scevgep, undef
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9"
+
+define void @foo() nounwind {
+entry:
+  br label %bb11
+
+bb11:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] ; <i32> [#uses=3]
+  %ii = shl i32 %i, 2                       ; <i32> [#uses=1]
+  %c0 = icmp eq i32 %i, undef                ; <i1> [#uses=1]
+  br i1 %c0, label %bb13, label %bb
+
+bb:
+  %t = load i8** undef, align 16                ; <i8*> [#uses=1]
+  %p = getelementptr i8* %t, i32 %ii ; <i8*> [#uses=1]
+  %c1 = icmp ult i8* %p, undef          ; <i1> [#uses=1]
+  %i.next = add i32 %i, 1                        ; <i32> [#uses=1]
+  br i1 %c1, label %bb11, label %bb13
+
+bb13:
+  unreachable
+}
diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll
index b267719421..17e7e2849c 100644
--- a/test/CodeGen/PowerPC/mem_update.ll
+++ b/test/CodeGen/PowerPC/mem_update.ll
@@ -3,66 +3,66 @@
 ; RUN: llc < %s -march=ppc64 -enable-ppc-preinc | \
 ; RUN:   not grep addi
 
-@Glob = global i64 4		; <i64*> [#uses=2]
+@Glob = global i64 4
 
-define i32* @test0(i32* %X, i32* %dest) {
-	%Y = getelementptr i32* %X, i32 4		; <i32*> [#uses=2]
-	%A = load i32* %Y		; <i32> [#uses=1]
+define i32* @test0(i32* %X, i32* %dest) nounwind {
+	%Y = getelementptr i32* %X, i32 4
+	%A = load i32* %Y
 	store i32 %A, i32* %dest
 	ret i32* %Y
 }
 
-define i32* @test1(i32* %X, i32* %dest) {
-	%Y = getelementptr i32* %X, i32 4		; <i32*> [#uses=2]
-	%A = load i32* %Y		; <i32> [#uses=1]
+define i32* @test1(i32* %X, i32* %dest) nounwind {
+	%Y = getelementptr i32* %X, i32 4
+	%A = load i32* %Y
 	store i32 %A, i32* %dest
 	ret i32* %Y
 }
 
-define i16* @test2(i16* %X, i32* %dest) {
-	%Y = getelementptr i16* %X, i32 4		; <i16*> [#uses=2]
-	%A = load i16* %Y		; <i16> [#uses=1]
-	%B = sext i16 %A to i32		; <i32> [#uses=1]
+define i16* @test2(i16* %X, i32* %dest) nounwind {
+	%Y = getelementptr i16* %X, i32 4
+	%A = load i16* %Y
+	%B = sext i16 %A to i32
 	store i32 %B, i32* %dest
 	ret i16* %Y
 }
 
-define i16* @test3(i16* %X, i32* %dest) {
-	%Y = getelementptr i16* %X, i32 4		; <i16*> [#uses=2]
-	%A = load i16* %Y		; <i16> [#uses=1]
-	%B = zext i16 %A to i32		; <i32> [#uses=1]
+define i16* @test3(i16* %X, i32* %dest) nounwind {
+	%Y = getelementptr i16* %X, i32 4
+	%A = load i16* %Y
+	%B = zext i16 %A to i32
 	store i32 %B, i32* %dest
 	ret i16* %Y
 }
 
-define i16* @test3a(i16* %X, i64* %dest) {
-	%Y = getelementptr i16* %X, i32 4		; <i16*> [#uses=2]
-	%A = load i16* %Y		; <i16> [#uses=1]
-	%B = sext i16 %A to i64		; <i64> [#uses=1]
+define i16* @test3a(i16* %X, i64* %dest) nounwind {
+	%Y = getelementptr i16* %X, i32 4
+	%A = load i16* %Y
+	%B = sext i16 %A to i64
 	store i64 %B, i64* %dest
 	ret i16* %Y
 }
 
-define i64* @test4(i64* %X, i64* %dest) {
-	%Y = getelementptr i64* %X, i32 4		; <i64*> [#uses=2]
-	%A = load i64* %Y		; <i64> [#uses=1]
+define i64* @test4(i64* %X, i64* %dest) nounwind {
+	%Y = getelementptr i64* %X, i32 4
+	%A = load i64* %Y
 	store i64 %A, i64* %dest
 	ret i64* %Y
 }
 
-define i16* @test5(i16* %X) {
-	%Y = getelementptr i16* %X, i32 4		; <i16*> [#uses=2]
+define i16* @test5(i16* %X) nounwind {
+	%Y = getelementptr i16* %X, i32 4
 	store i16 7, i16* %Y
 	ret i16* %Y
 }
 
-define i64* @test6(i64* %X, i64 %A) {
-	%Y = getelementptr i64* %X, i32 4		; <i64*> [#uses=2]
+define i64* @test6(i64* %X, i64 %A) nounwind {
+	%Y = getelementptr i64* %X, i32 4
 	store i64 %A, i64* %Y
 	ret i64* %Y
 }
 
-define i64* @test7(i64* %X, i64 %A) {
+define i64* @test7(i64* %X, i64 %A) nounwind {
 	store i64 %A, i64* @Glob
 	ret i64* @Glob
 }
diff --git a/test/CodeGen/PowerPC/retaddr.ll b/test/CodeGen/PowerPC/retaddr.ll
index 9f8647d087..cf16b4c26f 100644
--- a/test/CodeGen/PowerPC/retaddr.ll
+++ b/test/CodeGen/PowerPC/retaddr.ll
@@ -4,7 +4,7 @@
 
 target triple = "powerpc-apple-darwin8"
 
-define void @foo(i8** %X) {
+define void @foo(i8** %X) nounwind {
 entry:
 	%tmp = tail call i8* @llvm.returnaddress( i32 0 )		; <i8*> [#uses=1]
 	store i8* %tmp, i8** %X, align 4
diff --git a/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll b/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
new file mode 100644
index 0000000000..363f5719d1
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+
+define arm_apcscc i32 @test(i32 %n) nounwind {
+; CHECK: test:
+; CHECK-NOT: mov
+; CHECK: return
+entry:
+  %0 = icmp eq i32 %n, 1                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %tmp = add i32 %n, -1                           ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb.nph, %bb
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=1]
+  %u.05 = phi i64 [ undef, %bb.nph ], [ %ins, %bb ] ; <i64> [#uses=1]
+  %1 = tail call arm_apcscc  i32 @f() nounwind    ; <i32> [#uses=1]
+  %tmp4 = zext i32 %1 to i64                      ; <i64> [#uses=1]
+  %mask = and i64 %u.05, -4294967296              ; <i64> [#uses=1]
+  %ins = or i64 %tmp4, %mask                      ; <i64> [#uses=2]
+  tail call arm_apcscc  void @g(i64 %ins) nounwind
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret i32 undef
+}
+
+define arm_apcscc i32 @test_dead_cycle(i32 %n) nounwind {
+; CHECK: test_dead_cycle:
+; CHECK: blx
+; CHECK-NOT: mov
+; CHECK: blx
+entry:
+  %0 = icmp eq i32 %n, 1                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %tmp = add i32 %n, -1                           ; <i32> [#uses=2]
+  br label %bb
+
+bb:                                               ; preds = %bb.nph, %bb2
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb2 ] ; <i32> [#uses=2]
+  %u.17 = phi i64 [ undef, %bb.nph ], [ %u.0, %bb2 ] ; <i64> [#uses=2]
+  %tmp9 = sub i32 %tmp, %indvar                   ; <i32> [#uses=1]
+  %1 = icmp sgt i32 %tmp9, 1                      ; <i1> [#uses=1]
+  br i1 %1, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  %2 = tail call arm_apcscc  i32 @f() nounwind    ; <i32> [#uses=1]
+  %tmp6 = zext i32 %2 to i64                      ; <i64> [#uses=1]
+  %mask = and i64 %u.17, -4294967296              ; <i64> [#uses=1]
+  %ins = or i64 %tmp6, %mask                      ; <i64> [#uses=1]
+  tail call arm_apcscc  void @g(i64 %ins) nounwind
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+; also check for duplicate induction variables (radar 7645034)
+; CHECK: subs r{{.*}}, #1
+; CHECK-NOT: subs r{{.*}}, #1
+; CHECK: pop
+  %u.0 = phi i64 [ %ins, %bb1 ], [ %u.17, %bb ]   ; <i64> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb2, %entry
+  ret i32 undef
+}
+
+declare arm_apcscc i32 @f()
+
+declare arm_apcscc void @g(i64)
diff --git a/test/CodeGen/Thumb2/2010-02-24-BigStack.ll b/test/CodeGen/Thumb2/2010-02-24-BigStack.ll
new file mode 100644
index 0000000000..533546bb19
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-02-24-BigStack.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -O0 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 -mattr=+vfp2
+; This test creates a big stack frame without spilling any callee-saved registers.
+; Make sure the whole stack frame is addrerssable wiothout scavenger crashes.
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin3.0.0-iphoneos"
+
+define arm_apcscc void @FindMin(double* %panelTDEL, i8* %dclOfRow, i32 %numRows, i32 %numCols, double* %retMin_RES_TDEL) {
+entry:
+  %panelTDEL.addr = alloca double*, align 4       ; <double**> [#uses=1]
+  %panelResTDEL = alloca [2560 x double], align 4 ; <[2560 x double]*> [#uses=0]
+  store double* %panelTDEL, double** %panelTDEL.addr
+  store double* %retMin_RES_TDEL, double** undef
+  store i32 0, i32* undef
+  unreachable
+}
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
index 8f6449e8ff..2b20931979 100644
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 7
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 3
 
 define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {
 entry:
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index 47d85b1aa0..f007b5c697 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -52,7 +52,7 @@ bb420:                                            ; preds = %bb20, %bb20
 ; CHECK: str r{{[0-7]}}, [sp]
 ; CHECK: str r{{[0-7]}}, [sp, #+4]
 ; CHECK: str r{{[0-7]}}, [sp, #+8]
-; CHECK: str r{{[0-7]}}, [sp, #+24]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #+24]
   store %union.rec* null, %union.rec** @zz_hold, align 4
   store %union.rec* null, %union.rec** @zz_res, align 4
   store %union.rec* %x, %union.rec** @zz_hold, align 4
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 7b1b57a786..ac2cd34e4b 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -1,25 +1,29 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
 ; rdar://7387640
 
-; FIXME: We still need to rewrite array reference iv of stride -4 with loop
-; count iv of stride -1.
+; This now reduces to a single induction variable.
+
+; TODO: It still gets a GPR shuffle at the end of the loop
+; This is because something in instruction selection has decided
+; that comparing the pre-incremented value with zero is better
+; than comparing the post-incremented value with -4.
 
 @G = external global i32                          ; <i32*> [#uses=2]
 @array = external global i32*                     ; <i32**> [#uses=1]
 
 define arm_apcscc void @t() nounwind optsize {
 ; CHECK: t:
-; CHECK: mov.w r2, #4000
-; CHECK: movw r3, #1001
+; CHECK: mov.w r2, #1000
 entry:
   %.pre = load i32* @G, align 4                   ; <i32> [#uses=1]
   br label %bb
 
 bb:                                               ; preds = %bb, %entry
 ; CHECK: LBB1_1:
-; CHECK: subs r3, #1
-; CHECK: cmp r3, #0
-; CHECK: sub.w r2, r2, #4
+; CHECK: cmp r2, #0
+; CHECK: sub.w r9, r2, #1
+; CHECK: mov r2, r9
+
   %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
   %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
   %tmp5 = sub i32 1000, %indvar                   ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
index 71199abc57..1d267565e0 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
 
-define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; CHECK: t1:
 ; CHECK: it ne
 ; CHECK: cmpne
@@ -20,12 +20,12 @@ cond_next:
 }
 
 ; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
-define i32 @t2(i32 %a, i32 %b) {
+define i32 @t2(i32 %a, i32 %b) nounwind {
 entry:
 ; CHECK: t2:
-; CHECK: ite le
-; CHECK: suble
+; CHECK: ite gt
 ; CHECK: subgt
+; CHECK: suble
 	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
 	br i1 %tmp1434, label %bb17, label %bb.outer
 
@@ -60,14 +60,14 @@ bb17:		; preds = %cond_false, %cond_true, %entry
 
 @x = external global i32*		; <i32**> [#uses=1]
 
-define void @foo(i32 %a) {
+define void @foo(i32 %a) nounwind {
 entry:
 	%tmp = load i32** @x		; <i32*> [#uses=1]
 	store i32 %a, i32* %tmp
 	ret void
 }
 
-define void @t3(i32 %a, i32 %b) {
+define void @t3(i32 %a, i32 %b) nounwind {
 entry:
 ; CHECK: t3:
 ; CHECK: it lt
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index 7935163761..ff178b42fb 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -12,8 +12,8 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
 define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
 ; CHECK: bic r4, r4, #15
-; CHECK: vst1.64 {{.*}}[r{{.*}}, :128]
-; CHECK: vld1.64 {{.*}}[r{{.*}}, :128]
+; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
+; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
 entry:
   %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
   store float 6.300000e+01, float* undef, align 4
diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 4e23f5356c..91598cdc96 100644
--- a/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -2,14 +2,14 @@
 
 define i32 @test1(i32 %x) {
 ; CHECK: test1
-; CHECK: uxtb16.w  r0, r0
+; CHECK: uxtb16  r0, r0
 	%tmp1 = and i32 %x, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
 define i32 @test2(i32 %x) {
 ; CHECK: test2
-; CHECK: uxtb16.w  r0, r0, ror #8
+; CHECK: uxtb16  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
@@ -17,7 +17,7 @@ define i32 @test2(i32 %x) {
 
 define i32 @test3(i32 %x) {
 ; CHECK: test3
-; CHECK: uxtb16.w  r0, r0, ror #8
+; CHECK: uxtb16  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
@@ -25,7 +25,7 @@ define i32 @test3(i32 %x) {
 
 define i32 @test4(i32 %x) {
 ; CHECK: test4
-; CHECK: uxtb16.w  r0, r0, ror #8
+; CHECK: uxtb16  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp6 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp6
@@ -33,7 +33,7 @@ define i32 @test4(i32 %x) {
 
 define i32 @test5(i32 %x) {
 ; CHECK: test5
-; CHECK: uxtb16.w  r0, r0, ror #8
+; CHECK: uxtb16  r0, r0, ror #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
@@ -41,7 +41,7 @@ define i32 @test5(i32 %x) {
 
 define i32 @test6(i32 %x) {
 ; CHECK: test6
-; CHECK: uxtb16.w  r0, r0, ror #16
+; CHECK: uxtb16  r0, r0, ror #16
 	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
@@ -52,7 +52,7 @@ define i32 @test6(i32 %x) {
 
 define i32 @test7(i32 %x) {
 ; CHECK: test7
-; CHECK: uxtb16.w  r0, r0, ror #16
+; CHECK: uxtb16  r0, r0, ror #16
 	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
@@ -63,7 +63,7 @@ define i32 @test7(i32 %x) {
 
 define i32 @test8(i32 %x) {
 ; CHECK: test8
-; CHECK: uxtb16.w  r0, r0, ror #24
+; CHECK: uxtb16  r0, r0, ror #24
 	%tmp1 = shl i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711680		; <i32> [#uses=1]
 	%tmp5 = lshr i32 %x, 24		; <i32> [#uses=1]
@@ -73,7 +73,7 @@ define i32 @test8(i32 %x) {
 
 define i32 @test9(i32 %x) {
 ; CHECK: test9
-; CHECK: uxtb16.w  r0, r0, ror #24
+; CHECK: uxtb16  r0, r0, ror #24
 	%tmp1 = lshr i32 %x, 24		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 8		; <i32> [#uses=1]
 	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
@@ -86,7 +86,7 @@ define i32 @test10(i32 %p0) {
 ; CHECK: mov.w r1, #16253176
 ; CHECK: and.w r0, r1, r0, lsr #7
 ; CHECK: lsrs  r1, r0, #5
-; CHECK: uxtb16.w  r1, r1
+; CHECK: uxtb16  r1, r1
 ; CHECK: orr.w r0, r1, r0
 
 	%tmp1 = lshr i32 %p0, 7		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
index 32fafc61e8..fe6674da04 100644
--- a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
+++ b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
@@ -7,7 +7,7 @@
 
 @GLOBAL = external global i32           ; <i32*> [#uses=1]
 
-define i32 @test(i32* %P1, i32* %P2, i32* %P3) {
+define i32 @test(i32* %P1, i32* %P2, i32* %P3) nounwind {
         %L = load i32* @GLOBAL          ; <i32> [#uses=1]
         store i32 12, i32* %P2
         %Y = load i32* %P3              ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index bdbe713a29..56d6aa960e 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
-; RUN:     grep {asm-printer} | grep 31
+; RUN:     grep {asm-printer} | grep 34
 
 target datalayout = "e-p:32:32"
 define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
@@ -40,7 +40,7 @@ cond_true:		; preds = %cond_true, %entry
 	%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
 	store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
 	%tmp147 = add nsw i32 %tmp.10, 8		; <i32> [#uses=1]
-	%tmp.upgrd.8 = icmp slt i32 %tmp147, %M		; <i1> [#uses=1]
+	%tmp.upgrd.8 = icmp ne i32 %tmp147, %M		; <i1> [#uses=1]
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
 	br i1 %tmp.upgrd.8, label %cond_true, label %return
 
diff --git a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
index bf9fa5782b..d09d061476 100644
--- a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
+++ b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
@@ -5,7 +5,7 @@
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8.7.2"
 
-define <4 x float> @test(<4 x float> %A, <4 x float>* %B) {
+define <4 x float> @test(<4 x float> %A, <4 x float>* %B) nounwind {
         %BV = load <4 x float>* %B              ; <<4 x float>> [#uses=1]
         %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %A, <4 x float> %BV )       ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp28
diff --git a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
index 4cac9b4c4a..e1f890192d 100644
--- a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
+++ b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \
 ; RUN:   grep push | count 3
 
-define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) {
+define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) nounwind {
 entry:
 	icmp sgt i32 %size, 0		; <i1>:0 [#uses=1]
 	br i1 %0, label %bb.preheader, label %return
diff --git a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
index 67323e87ef..2c2706de5d 100644
--- a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
+++ b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
@@ -36,7 +36,9 @@ bb.i6.i:		; preds = %bb.i6.i, %stepsystem.exit.i
 
 bb107.i.i:		; preds = %bb107.i.i, %bb.i6.i
 	%q_addr.0.i.i.in = phi %struct.bnode** [ null, %bb107.i.i ], [ %4, %bb.i6.i ]		; <%struct.bnode**> [#uses=1]
-	%q_addr.0.i.i = load %struct.bnode** %q_addr.0.i.i.in		; <%struct.bnode*> [#uses=0]
+	%q_addr.0.i.i = load %struct.bnode** %q_addr.0.i.i.in		; <%struct.bnode*> [#uses=1]
+	%q_addr.1 = getelementptr %struct.anon* %0, i32 0, i32 4, i32 1
+	store %struct.bnode* %q_addr.0.i.i, %struct.bnode** %q_addr.1, align 4
 	br label %bb107.i.i
 
 bb47.loopexit.i:		; preds = %bb32.i
diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 721d4c945b..8e315f4d80 100644
--- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -35,7 +35,7 @@ cond_next36.i:		; preds = %cond_next.i
 bb.i28.i:		; preds = %bb.i28.i, %cond_next36.i
 ; CHECK: %bb.i28.i
 ; CHECK: addl $2
-; CHECK: addl $2
+; CHECK: addl $-2
 	%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ]		; <i32> [#uses=2]
 	%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ]		; <double> [#uses=1]
 	%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32		; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
index 8d6bb0df1f..a91ac27f98 100644
--- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 3
+; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 2
 ; rdar://5761454
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
diff --git a/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
index cd99c0e53c..548b44db6d 100644
--- a/test/CodeGen/X86/2008-07-11-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
@@ -1,9 +1,7 @@
 ; RUN: llc < %s -march=x86 -relocation-model=static -disable-fp-elim -post-RA-scheduler=false -asm-verbose=0 | FileCheck %s
 ; PR2536
 
-
-; CHECK: movw %cx
-; CHECK-NEXT: andl    $65534, %
+; CHECK: andl    $65534, %
 ; CHECK-NEXT: movl %
 ; CHECK-NEXT: movl $17
 
diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
index 67e14ffae5..4c6493445a 100644
--- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 58
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 55
 ; PR2568
 
 @g_3 = external global i16		; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index 72c7ee93a9..0dca14d064 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -3,74 +3,83 @@
 ; PR3538
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.block.type = type { i32, { }* }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.subrange.type = type { i32, i64, i64 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [4 x i8] c"t.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [2 x i8] c".\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@.str2 = internal constant [6 x i8] c"clang\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([2 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str2, i32 0, i32 0), i1 false, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str4 = internal constant [5 x i8] c"test\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([5 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str4, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str5 = internal constant [2 x i8] c"X\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@llvm.dbg.block = internal constant %llvm.dbg.block.type { i32 458763, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*) }, section "llvm.metadata"		; <%llvm.dbg.block.type*> [#uses=1]
-@llvm.dbg.subrange = internal constant %llvm.dbg.subrange.type { i32 458785, i64 0, i64 0 }, section "llvm.metadata"		; <%llvm.dbg.subrange.type*> [#uses=1]
-@llvm.dbg.array = internal constant [1 x { }*] [{ }* bitcast (%llvm.dbg.subrange.type* @llvm.dbg.subrange to { }*)], section "llvm.metadata"		; <[1 x { }*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458753, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 0, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast ([1 x { }*]* @llvm.dbg.array to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str6 = internal constant [2 x i8] c"Y\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable7 = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.block.type* @llvm.dbg.block to { }*), i8* getelementptr ([2 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-
-define i32 @test(i32 %X) nounwind {
+define signext i8 @foo(i8* %s1) nounwind ssp {
 entry:
-	%retval = alloca i32		; <i32*> [#uses=1]
-	%X.addr = alloca i32		; <i32*> [#uses=3]
-	%saved_stack = alloca i8*		; <i8**> [#uses=2]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	store i32 %X, i32* %X.addr
-	%0 = bitcast i32* %X.addr to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable to { }*))
-	call void @llvm.dbg.region.start({ }* bitcast (%llvm.dbg.block.type* @llvm.dbg.block to { }*))
-	call void @llvm.dbg.stoppoint(i32 4, i32 3, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%1 = call i8* @llvm.stacksave()		; <i8*> [#uses=1]
-	store i8* %1, i8** %saved_stack
-	%tmp = load i32* %X.addr		; <i32> [#uses=1]
-	%2 = mul i32 4, %tmp		; <i32> [#uses=1]
-	%vla = alloca i8, i32 %2		; <i8*> [#uses=1]
-	%tmp1 = bitcast i8* %vla to i32*		; <i32*> [#uses=1]
-	%3 = bitcast i32* %tmp1 to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %3, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable7 to { }*))
-	call void @llvm.dbg.stoppoint(i32 5, i32 1, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.block.type* @llvm.dbg.block to { }*))
-	br label %cleanup
+  %s1_addr = alloca i8*                           ; <i8**> [#uses=2]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %saved_stack.1 = alloca i8*                     ; <i8**> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %str.0 = alloca [0 x i8]*                       ; <[0 x i8]**> [#uses=3]
+  %1 = alloca i64                                 ; <i64*> [#uses=2]
+  %2 = alloca i64                                 ; <i64*> [#uses=1]
+  %3 = alloca i64                                 ; <i64*> [#uses=6]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{i8** %s1_addr}, metadata !0), !dbg !7
+  store i8* %s1, i8** %s1_addr
+  call void @llvm.dbg.declare(metadata !{[0 x i8]** %str.0}, metadata !8), !dbg !7
+  %4 = call i8* @llvm.stacksave(), !dbg !7        ; <i8*> [#uses=1]
+  store i8* %4, i8** %saved_stack.1, align 8, !dbg !7
+  %5 = load i8** %s1_addr, align 8, !dbg !13      ; <i8*> [#uses=1]
+  %6 = call i64 @strlen(i8* %5) nounwind readonly, !dbg !13 ; <i64> [#uses=1]
+  %7 = add i64 %6, 1, !dbg !13                    ; <i64> [#uses=1]
+  store i64 %7, i64* %3, align 8, !dbg !13
+  %8 = load i64* %3, align 8, !dbg !13            ; <i64> [#uses=1]
+  %9 = sub nsw i64 %8, 1, !dbg !13                ; <i64> [#uses=0]
+  %10 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  %11 = mul i64 %10, 8, !dbg !13                  ; <i64> [#uses=0]
+  %12 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  store i64 %12, i64* %2, align 8, !dbg !13
+  %13 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  %14 = mul i64 %13, 8, !dbg !13                  ; <i64> [#uses=0]
+  %15 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  store i64 %15, i64* %1, align 8, !dbg !13
+  %16 = load i64* %1, align 8, !dbg !13           ; <i64> [#uses=1]
+  %17 = trunc i64 %16 to i32, !dbg !13            ; <i32> [#uses=1]
+  %18 = alloca i8, i32 %17, !dbg !13              ; <i8*> [#uses=1]
+  %19 = bitcast i8* %18 to [0 x i8]*, !dbg !13    ; <[0 x i8]*> [#uses=1]
+  store [0 x i8]* %19, [0 x i8]** %str.0, align 8, !dbg !13
+  %20 = load [0 x i8]** %str.0, align 8, !dbg !15 ; <[0 x i8]*> [#uses=1]
+  %21 = getelementptr inbounds [0 x i8]* %20, i64 0, i64 0, !dbg !15 ; <i8*> [#uses=1]
+  store i8 0, i8* %21, align 1, !dbg !15
+  %22 = load [0 x i8]** %str.0, align 8, !dbg !16 ; <[0 x i8]*> [#uses=1]
+  %23 = getelementptr inbounds [0 x i8]* %22, i64 0, i64 0, !dbg !16 ; <i8*> [#uses=1]
+  %24 = load i8* %23, align 1, !dbg !16           ; <i8> [#uses=1]
+  %25 = sext i8 %24 to i32, !dbg !16              ; <i32> [#uses=1]
+  store i32 %25, i32* %0, align 4, !dbg !16
+  %26 = load i8** %saved_stack.1, align 8, !dbg !16 ; <i8*> [#uses=1]
+  call void @llvm.stackrestore(i8* %26), !dbg !16
+  %27 = load i32* %0, align 4, !dbg !16           ; <i32> [#uses=1]
+  store i32 %27, i32* %retval, align 4, !dbg !16
+  br label %return, !dbg !16
 
-cleanup:		; preds = %entry
-	%tmp2 = load i8** %saved_stack		; <i8*> [#uses=1]
-	call void @llvm.stackrestore(i8* %tmp2)
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	%4 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %4
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval, !dbg !16          ; <i32> [#uses=1]
+  %retval12 = trunc i32 %retval1 to i8, !dbg !16  ; <i8> [#uses=1]
+  ret i8 %retval12, !dbg !16
 }
 
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-declare void @llvm.dbg.region.start({ }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 declare i8* @llvm.stacksave() nounwind
 
+declare i64 @strlen(i8*) nounwind readonly
+
 declare void @llvm.stackrestore(i8*) nounwind
 
-declare void @llvm.dbg.region.end({ }*) nounwind
+!0 = metadata !{i32 459009, metadata !1, metadata !"s1", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 458769, i32 0, i32 1, metadata !"vla.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 458773, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !6}
+!5 = metadata !{i32 458788, metadata !2, metadata !"char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 2, i32 0, metadata !1, null}
+!8 = metadata !{i32 459008, metadata !1, metadata !"str.0", metadata !2, i32 3, metadata !9} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 458753, metadata !2, metadata !"", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !5, metadata !11, i32 0, null} ; [ DW_TAG_array_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 458785, i64 0, i64 0}        ; [ DW_TAG_subrange_type ]
+!13 = metadata !{i32 3, i32 0, metadata !14, null}
+!14 = metadata !{i32 458763, metadata !1, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 4, i32 0, metadata !14, null}
+!16 = metadata !{i32 5, i32 0, metadata !14, null}
diff --git a/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll b/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll
deleted file mode 100644
index e21c8923df..0000000000
--- a/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll
+++ /dev/null
@@ -1,102 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
-
-; CHECK: _foo:
-; CHECK: pavgw LCPI1_4(%rip)
-
-; rdar://7057804
-
-define void @foo(i16* %out8x8, i16* %in8x8, i32 %lastrow) optsize ssp {
-entry:
-	%0 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
-	%1 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %0, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%2 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%3 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %2, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%tmp.i.i10 = add <8 x i16> %0, %3		; <<8 x i16>> [#uses=1]
-	%4 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> zeroinitializer, <8 x i16> %1) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%5 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i10, <8 x i16> %4) nounwind readnone		; <<8 x i16>> [#uses=3]
-	%6 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%7 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
-	%8 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %7, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%9 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%10 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %9, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%tmp.i.i8 = add <8 x i16> %7, %10		; <<8 x i16>> [#uses=1]
-	%11 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %8) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%12 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i8, <8 x i16> %11) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%13 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%14 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%15 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %6, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%17 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %12, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%18 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %13, <8 x i16> %15) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%19 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %14) nounwind readnone		; <<8 x i16>> [#uses=2]
-	%20 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=4]
-	%21 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %17) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%22 = bitcast <8 x i16> %21 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	%23 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
-	%24 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %23, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%25 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%26 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %25, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%tmp.i.i6 = add <8 x i16> %23, %26		; <<8 x i16>> [#uses=1]
-	%27 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %24) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%28 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i6, <8 x i16> %27) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%29 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
-	%30 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %29, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%31 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%32 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %31, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%tmp.i.i4 = add <8 x i16> %29, %32		; <<8 x i16>> [#uses=1]
-	%33 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %30) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%34 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i4, <8 x i16> %33) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%35 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> %20) nounwind readnone		; <<8 x i16>> [#uses=2]
-	%tmp.i2.i1 = mul <8 x i16> %20, <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>		; <<8 x i16>> [#uses=1]
-	%36 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %35, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%37 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i1, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%38 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %37, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%tmp.i.i2 = add <8 x i16> %35, %38		; <<8 x i16>> [#uses=1]
-	%39 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %36) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%40 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i2, <8 x i16> %39) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%41 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> %20) nounwind readnone		; <<8 x i16>> [#uses=2]
-	%tmp.i2.i = mul <8 x i16> %20, <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>		; <<8 x i16>> [#uses=1]
-	%42 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %41, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%43 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%44 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %43, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%tmp.i.i = add <8 x i16> %41, %44		; <<8 x i16>> [#uses=1]
-	%45 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %42) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%46 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i, <8 x i16> %45) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%47 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %18, <8 x i16> %16) nounwind readnone		; <<8 x i16>> [#uses=1]
-	%48 = bitcast <8 x i16> %47 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	%49 = bitcast <8 x i16> %28 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	%50 = getelementptr i16* %out8x8, i64 8		; <i16*> [#uses=1]
-	%51 = bitcast i16* %50 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
-	store <2 x i64> %49, <2 x i64>* %51, align 16
-	%52 = bitcast <8 x i16> %40 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	%53 = getelementptr i16* %out8x8, i64 16		; <i16*> [#uses=1]
-	%54 = bitcast i16* %53 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
-	store <2 x i64> %52, <2 x i64>* %54, align 16
-	%55 = getelementptr i16* %out8x8, i64 24		; <i16*> [#uses=1]
-	%56 = bitcast i16* %55 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
-	store <2 x i64> %48, <2 x i64>* %56, align 16
-	%57 = bitcast <8 x i16> %46 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	%58 = getelementptr i16* %out8x8, i64 40		; <i16*> [#uses=1]
-	%59 = bitcast i16* %58 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
-	store <2 x i64> %57, <2 x i64>* %59, align 16
-	%60 = bitcast <8 x i16> %34 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	%61 = getelementptr i16* %out8x8, i64 48		; <i16*> [#uses=1]
-	%62 = bitcast i16* %61 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
-	store <2 x i64> %60, <2 x i64>* %62, align 16
-	%63 = getelementptr i16* %out8x8, i64 56		; <i16*> [#uses=1]
-	%64 = bitcast i16* %63 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
-	store <2 x i64> %22, <2 x i64>* %64, align 16
-	ret void
-}
-
-declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
diff --git a/test/CodeGen/X86/2009-09-07-CoalescerBug.ll b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll
index a5b4a79401..41b4bc0872 100644
--- a/test/CodeGen/X86/2009-09-07-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll
@@ -8,8 +8,7 @@
 define i64 @hammer_time(i64 %modulep, i64 %physfree) nounwind ssp noredzone noimplicitfloat {
 ; CHECK: hammer_time:
 ; CHECK: movq $Xrsvd, %rax
-; CHECK: movq $Xrsvd, %rsi
-; CHECK: movq $Xrsvd, %rdi
+; CHECK: movq $Xrsvd, %rcx
 entry:
   br i1 undef, label %if.then, label %if.end
 
diff --git a/test/CodeGen/X86/2010-02-11-NonTemporal.ll b/test/CodeGen/X86/2010-02-11-NonTemporal.ll
new file mode 100644
index 0000000000..5789a0b984
--- /dev/null
+++ b/test/CodeGen/X86/2010-02-11-NonTemporal.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK: movnt
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+!0 = metadata !{ i32 1 }
+
+define void @sub_(i32* noalias %n) {
+"file movnt.f90, line 2, bb1":
+	%n1 = alloca i32*, align 8
+	%i = alloca i32, align 4
+	%"$LCS_0" = alloca i64, align 8
+	%"$LCS_S2" = alloca <2 x double>, align 16
+	%r9 = load <2 x double>* %"$LCS_S2", align 8
+	%r10 = load i64* %"$LCS_0", align 8
+	%r11 = inttoptr i64 %r10 to <2 x double>*
+	store <2 x double> %r9, <2 x double>* %r11, align 16, !nontemporal !0
+	br label %"file movnt.f90, line 18, bb5"
+
+"file movnt.f90, line 18, bb5":	
+	ret void
+}
diff --git a/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll b/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
new file mode 100644
index 0000000000..c5d3d16f81
--- /dev/null
+++ b/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
@@ -0,0 +1,260 @@
+; RUN: llc < %s > %t
+; PR6283
+
+; Tricky coalescer bug:
+; After coalescing %RAX with a virtual register, this instruction was rematted:
+;
+;   %EAX<def> = MOV32rr %reg1070<kill>
+;
+; This instruction silently defined %RAX, and when rematting removed the
+; instruction, the live interval for %RAX was not properly updated. The valno
+; referred to a deleted instruction and bad things happened.
+;
+; The fix is to implicitly define %RAX when coalescing:
+;
+;   %EAX<def> = MOV32rr %reg1070<kill>, %RAX<imp-def>
+;
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.5.0 20100212 (experimental) LLVM: 95975\22"
+
+%0 = type { %"union gimple_statement_d"* }
+%"BITMAP_WORD[]" = type [2 x i64]
+%"char[]" = type [4 x i8]
+%"enum dom_state[]" = type [2 x i32]
+%"int[]" = type [4 x i32]
+%"struct VEC_basic_block_base" = type { i32, i32, [1 x %"struct basic_block_def"*] }
+%"struct VEC_basic_block_gc" = type { %"struct VEC_basic_block_base" }
+%"struct VEC_edge_base" = type { i32, i32, [1 x %"struct edge_def"*] }
+%"struct VEC_edge_gc" = type { %"struct VEC_edge_base" }
+%"struct VEC_gimple_base" = type { i32, i32, [1 x %"union gimple_statement_d"*] }
+%"struct VEC_gimple_gc" = type { %"struct VEC_gimple_base" }
+%"struct VEC_iv_cand_p_base" = type { i32, i32, [1 x %"struct iv_cand"*] }
+%"struct VEC_iv_cand_p_heap" = type { %"struct VEC_iv_cand_p_base" }
+%"struct VEC_iv_use_p_base" = type { i32, i32, [1 x %"struct iv_use"*] }
+%"struct VEC_iv_use_p_heap" = type { %"struct VEC_iv_use_p_base" }
+%"struct VEC_loop_p_base" = type { i32, i32, [1 x %"struct loop"*] }
+%"struct VEC_loop_p_gc" = type { %"struct VEC_loop_p_base" }
+%"struct VEC_rtx_base" = type { i32, i32, [1 x %"struct rtx_def"*] }
+%"struct VEC_rtx_gc" = type { %"struct VEC_rtx_base" }
+%"struct VEC_tree_base" = type { i32, i32, [1 x %"union tree_node"*] }
+%"struct VEC_tree_gc" = type { %"struct VEC_tree_base" }
+%"struct _obstack_chunk" = type { i8*, %"struct _obstack_chunk"*, %"char[]" }
+%"struct basic_block_def" = type { %"struct VEC_edge_gc"*, %"struct VEC_edge_gc"*, i8*, %"struct loop"*, [2 x %"struct et_node"*], %"struct basic_block_def"*, %"struct basic_block_def"*, %"union basic_block_il_dependent", i64, i32, i32, i32, i32, i32 }
+%"struct bitmap_element" = type { %"struct bitmap_element"*, %"struct bitmap_element"*, i32, %"BITMAP_WORD[]" }
+%"struct bitmap_head_def" = type { %"struct bitmap_element"*, %"struct bitmap_element"*, i32, %"struct bitmap_obstack"* }
+%"struct bitmap_obstack" = type { %"struct bitmap_element"*, %"struct bitmap_head_def"*, %"struct obstack" }
+%"struct block_symbol" = type { [3 x %"union rtunion"], %"struct object_block"*, i64 }
+%"struct comp_cost" = type { i32, i32 }
+%"struct control_flow_graph" = type { %"struct basic_block_def"*, %"struct basic_block_def"*, %"struct VEC_basic_block_gc"*, i32, i32, i32, %"struct VEC_basic_block_gc"*, i32, %"enum dom_state[]", %"enum dom_state[]", i32, i32 }
+%"struct cost_pair" = type { %"struct iv_cand"*, %"struct comp_cost", %"struct bitmap_head_def"*, %"union tree_node"* }
+%"struct def_optype_d" = type { %"struct def_optype_d"*, %"union tree_node"** }
+%"struct double_int" = type { i64, i64 }
+%"struct edge_def" = type { %"struct basic_block_def"*, %"struct basic_block_def"*, %"union edge_def_insns", i8*, %"union tree_node"*, i32, i32, i32, i32, i64 }
+%"struct eh_status" = type opaque
+%"struct et_node" = type opaque
+%"struct function" = type { %"struct eh_status"*, %"struct control_flow_graph"*, %"struct gimple_seq_d"*, %"struct gimple_df"*, %"struct loops"*, %"struct htab"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"struct machine_function"*, %"struct language_function"*, %"struct htab"*, i32, i32, i32, i32, i32, i32, i8*, i8, i8, i8, i8 }
+%"struct gimple_bb_info" = type { %"struct gimple_seq_d"*, %"struct gimple_seq_d"* }
+%"struct gimple_df" = type { %"struct htab"*, %"struct VEC_gimple_gc"*, %"struct VEC_tree_gc"*, %"union tree_node"*, %"struct pt_solution", %"struct pt_solution", %"struct pointer_map_t"*, %"union tree_node"*, %"struct htab"*, %"struct bitmap_head_def"*, i8, %"struct ssa_operands" }
+%"struct gimple_seq_d" = type { %"struct gimple_seq_node_d"*, %"struct gimple_seq_node_d"*, %"struct gimple_seq_d"* }
+%"struct gimple_seq_node_d" = type { %"union gimple_statement_d"*, %"struct gimple_seq_node_d"*, %"struct gimple_seq_node_d"* }
+%"struct gimple_statement_base" = type { i8, i8, i16, i32, i32, i32, %"struct basic_block_def"*, %"union tree_node"* }
+%"struct gimple_statement_phi" = type { %"struct gimple_statement_base", i32, i32, %"union tree_node"*, %"struct phi_arg_d[]" }
+%"struct htab" = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i64, i64, i64, i32, i32, i8* (i64, i64)*, void (i8*)*, i8*, i8* (i8*, i64, i64)*, void (i8*, i8*)*, i32 }
+%"struct iv" = type { %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i8, i8, i32 }
+%"struct iv_cand" = type { i32, i8, i32, %"union gimple_statement_d"*, %"union tree_node"*, %"union tree_node"*, %"struct iv"*, i32, i32, %"struct iv_use"*, %"struct bitmap_head_def"* }
+%"struct iv_use" = type { i32, i32, %"struct iv"*, %"union gimple_statement_d"*, %"union tree_node"**, %"struct bitmap_head_def"*, i32, %"struct cost_pair"*, %"struct iv_cand"* }
+%"struct ivopts_data" = type { %"struct loop"*, %"struct pointer_map_t"*, i32, i32, %"struct version_info"*, %"struct bitmap_head_def"*, %"struct VEC_iv_use_p_heap"*, %"struct VEC_iv_cand_p_heap"*, %"struct bitmap_head_def"*, i32, i8, i8 }
+%"struct lang_decl" = type opaque
+%"struct language_function" = type opaque
+%"struct loop" = type { i32, i32, %"struct basic_block_def"*, %"struct basic_block_def"*, %"struct comp_cost", i32, i32, %"struct VEC_loop_p_gc"*, %"struct loop"*, %"struct loop"*, i8*, %"union tree_node"*, %"struct double_int", %"struct double_int", i8, i8, i32, %"struct nb_iter_bound"*, %"struct loop_exit"*, i8, %"union tree_node"* }
+%"struct loop_exit" = type { %"struct edge_def"*, %"struct loop_exit"*, %"struct loop_exit"*, %"struct loop_exit"* }
+%"struct loops" = type { i32, %"struct VEC_loop_p_gc"*, %"struct htab"*, %"struct loop"* }
+%"struct machine_cfa_state" = type { %"struct rtx_def"*, i64 }
+%"struct machine_function" = type { %"struct stack_local_entry"*, i8*, i32, i32, %"int[]", i32, %"struct machine_cfa_state", i32, i8 }
+%"struct nb_iter_bound" = type { %"union gimple_statement_d"*, %"struct double_int", i8, %"struct nb_iter_bound"* }
+%"struct object_block" = type { %"union section"*, i32, i64, %"struct VEC_rtx_gc"*, %"struct VEC_rtx_gc"* }
+%"struct obstack" = type { i64, %"struct _obstack_chunk"*, i8*, i8*, i8*, i64, i32, %"struct _obstack_chunk"* (i8*, i64)*, void (i8*, %"struct _obstack_chunk"*)*, i8*, i8 }
+%"struct phi_arg_d" = type { %"struct ssa_use_operand_d", %"union tree_node"*, i32 }
+%"struct phi_arg_d[]" = type [1 x %"struct phi_arg_d"]
+%"struct pointer_map_t" = type opaque
+%"struct pt_solution" = type { i8, %"struct bitmap_head_def"* }
+%"struct rtx_def" = type { i16, i8, i8, %"union u" }
+%"struct section_common" = type { i32 }
+%"struct ssa_operand_memory_d" = type { %"struct ssa_operand_memory_d"*, %"uchar[]" }
+%"struct ssa_operands" = type { %"struct ssa_operand_memory_d"*, i32, i32, i8, %"struct def_optype_d"*, %"struct use_optype_d"* }
+%"struct ssa_use_operand_d" = type { %"struct ssa_use_operand_d"*, %"struct ssa_use_operand_d"*, %0, %"union tree_node"** }
+%"struct stack_local_entry" = type opaque
+%"struct tree_base" = type <{ i16, i8, i8, i8, [2 x i8], i8 }>
+%"struct tree_common" = type { %"struct tree_base", %"union tree_node"*, %"union tree_node"* }
+%"struct tree_decl_common" = type { %"struct tree_decl_minimal", %"union tree_node"*, i8, i8, i8, i8, i8, i32, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"struct lang_decl"* }
+%"struct tree_decl_minimal" = type { %"struct tree_common", i32, i32, %"union tree_node"*, %"union tree_node"* }
+%"struct tree_decl_non_common" = type { %"struct tree_decl_with_vis", %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"* }
+%"struct tree_decl_with_rtl" = type { %"struct tree_decl_common", %"struct rtx_def"* }
+%"struct tree_decl_with_vis" = type { %"struct tree_decl_with_rtl", %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i8, i8, i8 }
+%"struct tree_function_decl" = type { %"struct tree_decl_non_common", %"struct function"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i16, i8, i8 }
+%"struct unnamed_section" = type { %"struct section_common", void (i8*)*, i8*, %"union section"* }
+%"struct use_optype_d" = type { %"struct use_optype_d"*, %"struct ssa_use_operand_d" }
+%"struct version_info" = type { %"union tree_node"*, %"struct iv"*, i8, i32, i8 }
+%"uchar[]" = type [1 x i8]
+%"union basic_block_il_dependent" = type { %"struct gimple_bb_info"* }
+%"union edge_def_insns" = type { %"struct gimple_seq_d"* }
+%"union gimple_statement_d" = type { %"struct gimple_statement_phi" }
+%"union rtunion" = type { i8* }
+%"union section" = type { %"struct unnamed_section" }
+%"union tree_node" = type { %"struct tree_function_decl" }
+%"union u" = type { %"struct block_symbol" }
+
+declare fastcc %"union tree_node"* @get_computation_at(%"struct loop"*, %"struct iv_use"* nocapture, %"struct iv_cand"* nocapture, %"union gimple_statement_d"*) nounwind
+
+declare fastcc i32 @computation_cost(%"union tree_node"*, i8 zeroext) nounwind
+
+define fastcc i64 @get_computation_cost_at(%"struct ivopts_data"* %data, %"struct iv_use"* nocapture %use, %"struct iv_cand"* nocapture %cand, i8 zeroext %address_p, %"struct bitmap_head_def"** %depends_on, %"union gimple_statement_d"* %at, i8* %can_autoinc) nounwind {
+entry:
+  br i1 undef, label %"100", label %"4"
+
+"4":                                              ; preds = %entry
+  br i1 undef, label %"6", label %"5"
+
+"5":                                              ; preds = %"4"
+  unreachable
+
+"6":                                              ; preds = %"4"
+  br i1 undef, label %"8", label %"7"
+
+"7":                                              ; preds = %"6"
+  unreachable
+
+"8":                                              ; preds = %"6"
+  br i1 undef, label %"100", label %"10"
+
+"10":                                             ; preds = %"8"
+  br i1 undef, label %"17", label %"16"
+
+"16":                                             ; preds = %"10"
+  unreachable
+
+"17":                                             ; preds = %"10"
+  br i1 undef, label %"19", label %"18"
+
+"18":                                             ; preds = %"17"
+  unreachable
+
+"19":                                             ; preds = %"17"
+  br i1 undef, label %"93", label %"20"
+
+"20":                                             ; preds = %"19"
+  br i1 undef, label %"23", label %"21"
+
+"21":                                             ; preds = %"20"
+  unreachable
+
+"23":                                             ; preds = %"20"
+  br i1 undef, label %"100", label %"25"
+
+"25":                                             ; preds = %"23"
+  br i1 undef, label %"100", label %"26"
+
+"26":                                             ; preds = %"25"
+  br i1 undef, label %"30", label %"28"
+
+"28":                                             ; preds = %"26"
+  unreachable
+
+"30":                                             ; preds = %"26"
+  br i1 undef, label %"59", label %"51"
+
+"51":                                             ; preds = %"30"
+  br i1 undef, label %"55", label %"52"
+
+"52":                                             ; preds = %"51"
+  unreachable
+
+"55":                                             ; preds = %"51"
+  %0 = icmp ugt i32 0, undef                      ; <i1> [#uses=1]
+  br i1 %0, label %"50.i", label %"9.i"
+
+"9.i":                                            ; preds = %"55"
+  unreachable
+
+"50.i":                                           ; preds = %"55"
+  br i1 undef, label %"55.i", label %"54.i"
+
+"54.i":                                           ; preds = %"50.i"
+  br i1 undef, label %"57.i", label %"55.i"
+
+"55.i":                                           ; preds = %"54.i", %"50.i"
+  unreachable
+
+"57.i":                                           ; preds = %"54.i"
+  br label %"63.i"
+
+"61.i":                                           ; preds = %"63.i"
+  br i1 undef, label %"64.i", label %"62.i"
+
+"62.i":                                           ; preds = %"61.i"
+  br label %"63.i"
+
+"63.i":                                           ; preds = %"62.i", %"57.i"
+  br i1 undef, label %"61.i", label %"64.i"
+
+"64.i":                                           ; preds = %"63.i", %"61.i"
+  unreachable
+
+"59":                                             ; preds = %"30"
+  br i1 undef, label %"60", label %"82"
+
+"60":                                             ; preds = %"59"
+  br i1 undef, label %"61", label %"82"
+
+"61":                                             ; preds = %"60"
+  br i1 undef, label %"62", label %"82"
+
+"62":                                             ; preds = %"61"
+  br i1 undef, label %"100", label %"63"
+
+"63":                                             ; preds = %"62"
+  br i1 undef, label %"65", label %"64"
+
+"64":                                             ; preds = %"63"
+  unreachable
+
+"65":                                             ; preds = %"63"
+  br i1 undef, label %"66", label %"67"
+
+"66":                                             ; preds = %"65"
+  unreachable
+
+"67":                                             ; preds = %"65"
+  %1 = load i32* undef, align 4                   ; <i32> [#uses=0]
+  br label %"100"
+
+"82":                                             ; preds = %"61", %"60", %"59"
+  unreachable
+
+"93":                                             ; preds = %"19"
+  %2 = call fastcc %"union tree_node"* @get_computation_at(%"struct loop"* undef, %"struct iv_use"* %use, %"struct iv_cand"* %cand, %"union gimple_statement_d"* %at) nounwind ; <%"union tree_node"*> [#uses=1]
+  br i1 undef, label %"100", label %"97"
+
+"97":                                             ; preds = %"93"
+  br i1 undef, label %"99", label %"98"
+
+"98":                                             ; preds = %"97"
+  br label %"99"
+
+"99":                                             ; preds = %"98", %"97"
+  %3 = phi %"union tree_node"* [ undef, %"98" ], [ %2, %"97" ] ; <%"union tree_node"*> [#uses=1]
+  %4 = call fastcc i32 @computation_cost(%"union tree_node"* %3, i8 zeroext undef) nounwind ; <i32> [#uses=1]
+  br label %"100"
+
+"100":                                            ; preds = %"99", %"93", %"67", %"62", %"25", %"23", %"8", %entry
+  %memtmp1.1.0 = phi i32 [ 0, %"99" ], [ 10000000, %entry ], [ 10000000, %"8" ], [ 10000000, %"23" ], [ 10000000, %"25" ], [ undef, %"62" ], [ undef, %"67" ], [ 10000000, %"93" ] ; <i32> [#uses=1]
+  %memtmp1.0.0 = phi i32 [ %4, %"99" ], [ 10000000, %entry ], [ 10000000, %"8" ], [ 10000000, %"23" ], [ 10000000, %"25" ], [ undef, %"62" ], [ undef, %"67" ], [ 10000000, %"93" ] ; <i32> [#uses=1]
+  %5 = zext i32 %memtmp1.0.0 to i64               ; <i64> [#uses=1]
+  %6 = zext i32 %memtmp1.1.0 to i64               ; <i64> [#uses=1]
+  %7 = shl i64 %6, 32                             ; <i64> [#uses=1]
+  %8 = or i64 %7, %5                              ; <i64> [#uses=1]
+  ret i64 %8
+}
diff --git a/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll b/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll
new file mode 100644
index 0000000000..c429172852
--- /dev/null
+++ b/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s > %t
+; PR6300
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+; When the "154" loops back onto itself, it defines a register after using it.
+; The first value of the register is implicit-def.
+
+%"struct location_chain_def" = type { %"struct location_chain_def"*, %"struct rtx_def"*, %"struct rtx_def"*, i32 }
+%"struct real_value" = type { i32, [5 x i32] }
+%"struct rtx_def" = type { i16, i8, i8, %"union u" }
+%"union u" = type { %"struct real_value" }
+
+define i32 @variable_union(i8** nocapture %slot, i8* nocapture %data) nounwind {
+entry:
+  br i1 undef, label %"4.thread", label %"3"
+
+"4.thread":                                       ; preds = %entry
+  unreachable
+
+"3":                                              ; preds = %entry
+  br i1 undef, label %"19", label %"20"
+
+"19":                                             ; preds = %"3"
+  unreachable
+
+"20":                                             ; preds = %"3"
+  br i1 undef, label %"56.preheader", label %dv_onepart_p.exit
+
+dv_onepart_p.exit:                                ; preds = %"20"
+  unreachable
+
+"56.preheader":                                   ; preds = %"20"
+  br label %"56"
+
+"50":                                             ; preds = %"57"
+  br label %"56"
+
+"56":                                             ; preds = %"50", %"56.preheader"
+  br i1 undef, label %"57", label %"58"
+
+"57":                                             ; preds = %"56"
+  br i1 undef, label %"50", label %"58"
+
+"58":                                             ; preds = %"57", %"56"
+  br i1 undef, label %"62", label %"63"
+
+"62":                                             ; preds = %"58"
+  unreachable
+
+"63":                                             ; preds = %"58"
+  br i1 undef, label %"67", label %"66"
+
+"66":                                             ; preds = %"63"
+  br label %"67"
+
+"67":                                             ; preds = %"66", %"63"
+  br label %"68"
+
+"68":                                             ; preds = %"161", %"67"
+  br i1 undef, label %"153", label %"161"
+
+"153":                                            ; preds = %"68"
+  br i1 undef, label %"160", label %bb.nph46
+
+bb.nph46:                                         ; preds = %"153"
+  br label %"154"
+
+"154":                                            ; preds = %"154", %bb.nph46
+  %0 = phi %"struct location_chain_def"** [ undef, %bb.nph46 ], [ %1, %"154" ] ; <%"struct location_chain_def"**> [#uses=1]
+  %1 = bitcast i8* undef to %"struct location_chain_def"** ; <%"struct location_chain_def"**> [#uses=1]
+  store %"struct location_chain_def"* undef, %"struct location_chain_def"** %0, align 4
+  br i1 undef, label %"160", label %"154"
+
+"160":                                            ; preds = %"154", %"153"
+  br label %"161"
+
+"161":                                            ; preds = %"160", %"68"
+  br label %"68"
+}
diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
new file mode 100644
index 0000000000..eb21dc234a
--- /dev/null
+++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
+; Check that lowered argumens do not overwrite the return address before it is moved.
+; Bug 6225
+;
+; If a call is a fastcc tail call and tail call optimization is enabled, the
+; caller frame is replaced by the callee frame. This can require that arguments are 
+; placed on the former return address stack slot. Special care needs to be taken
+; taken that the return address is moved / or stored in a register before
+; lowering of arguments potentially overwrites the value.
+;
+; Move return address (76(%esp)) to a temporary register (%ebp)
+; CHECK: movl 76(%esp), %ebp
+; Overwrite return addresss
+; CHECK: movl %ecx, 76(%esp)
+; Move return address from temporary register (%ebp) to new stack location (60(%esp))
+; CHECK: movl %ebp, 60(%esp)
+
+%tupl_p = type [9 x i32]*
+
+declare fastcc void @l297(i32 %r10, i32 %r9, i32 %r8, i32 %r7, i32 %r6, i32 %r5, i32 %r3, i32 %r2) noreturn nounwind
+declare fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
+
+define fastcc void @l186(%tupl_p %r1) noreturn nounwind {
+entry:
+  %ptr1 = getelementptr %tupl_p %r1, i32 0, i32 0
+  %r2 = load i32* %ptr1
+  %ptr3 = getelementptr %tupl_p %r1, i32 0, i32 1
+  %r3 = load i32* %ptr3
+  %ptr5 = getelementptr %tupl_p %r1, i32 0, i32 2
+  %r4 = load i32* %ptr5
+  %ptr7 = getelementptr %tupl_p %r1, i32 0, i32 3
+  %r5 = load i32* %ptr7
+  %ptr9 = getelementptr %tupl_p %r1, i32 0, i32 4
+  %r6 = load i32* %ptr9
+  %ptr11 = getelementptr %tupl_p %r1, i32 0, i32 5
+  %r7 = load i32* %ptr11
+  %ptr13 = getelementptr %tupl_p %r1, i32 0, i32 6
+  %r8 = load i32* %ptr13
+  %ptr15 = getelementptr %tupl_p %r1, i32 0, i32 7
+  %r9 = load i32* %ptr15
+  %ptr17 = getelementptr %tupl_p %r1, i32 0, i32 8
+  %r10 = load i32* %ptr17
+  %cond = icmp eq i32 %r10, 3
+  br i1 %cond, label %true, label %false
+
+true:
+  tail call fastcc void @l297(i32 %r10, i32 %r9, i32 %r8, i32 %r7, i32 %r6, i32 %r5, i32 %r3, i32 %r2) noreturn nounwind
+  ret void
+
+false:
+  tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
+  ret void
+}
+
+
diff --git a/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll b/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll
new file mode 100644
index 0000000000..6a58e9e551
--- /dev/null
+++ b/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32* @t() nounwind optsize ssp {
+entry:
+; CHECK: t:
+; CHECK: testl %eax, %eax
+; CHECK: js
+  %cmp = icmp slt i32 undef, 0                    ; <i1> [#uses=1]
+  %outsearch.0 = select i1 %cmp, i1 false, i1 true ; <i1> [#uses=1]
+  br i1 %outsearch.0, label %if.then27, label %if.else29
+
+if.then27:                                        ; preds = %entry
+  ret i32* undef
+
+if.else29:                                        ; preds = %entry
+  unreachable
+}
+
diff --git a/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll b/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll
new file mode 100644
index 0000000000..8543c80117
--- /dev/null
+++ b/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s
+; PR6374
+;
+; This test produces a DIV8r instruction and uses %AX instead of %AH and %AL.
+; The DIV8r must have the right imp-defs for that to work.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct._i386_state = type { %union.anon }
+%union.anon = type { [0 x i8] }
+
+define void @i386_aam(%struct._i386_state* nocapture %cpustate) nounwind ssp {
+entry:
+  %call = tail call fastcc signext i8 @FETCH()    ; <i8> [#uses=1]
+  %rem = urem i8 0, %call                         ; <i8> [#uses=1]
+  store i8 %rem, i8* undef
+  ret void
+}
+
+declare fastcc signext i8 @FETCH() nounwind readnone ssp
diff --git a/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll b/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
new file mode 100644
index 0000000000..4a26ba088e
--- /dev/null
+++ b/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s
+; PR6372
+;
+; This test produces a move instruction with an implicitly defined super-register:
+;
+;   %DL<def> = MOV8rr %reg1038<kill>, %RDX<imp-def>
+;
+; When %DL is rematerialized, we must remember to update live intervals for
+; sub-registers %DX and %EDX.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define noalias i8* @foo() nounwind ssp {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %if.end40, %entry
+  %tmp6 = load i8* undef, align 2                 ; <i8> [#uses=3]
+  %conv11 = sext i8 %tmp6 to i64                  ; <i64> [#uses=1]
+  %cmp15 = icmp slt i64 %conv11, undef            ; <i1> [#uses=1]
+  br i1 %cmp15, label %if.end, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %conv18 = sext i8 %tmp6 to i32                  ; <i32> [#uses=1]
+  %call = tail call i32 (...)* @invalid(i32 0, i32 0, i32 %conv18) nounwind ; <i32> [#uses=0]
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  %index.0 = phi i8 [ 0, %if.then ], [ %tmp6, %for.body ] ; <i8> [#uses=1]
+  store i8 %index.0, i8* undef
+  %tmp24 = load i8* undef                         ; <i8> [#uses=2]
+  br i1 undef, label %if.end40, label %if.then36
+
+if.then36:                                        ; preds = %if.end
+  %conv38 = sext i8 %tmp24 to i32                 ; <i32> [#uses=1]
+  %call39 = tail call i32 (...)* @invalid(i32 0, i32 0, i32 %conv38) nounwind ; <i32> [#uses=0]
+  br label %if.end40
+
+if.end40:                                         ; preds = %if.then36, %if.end
+  %index.1 = phi i8 [ 0, %if.then36 ], [ %tmp24, %if.end ] ; <i8> [#uses=1]
+  store i8 %index.1, i8* undef
+  br i1 false, label %for.body, label %for.end
+
+for.end:                                          ; preds = %if.end40, %entry
+  ret i8* undef
+}
+
+declare i32 @invalid(...)
diff --git a/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll b/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll
new file mode 100644
index 0000000000..aeed401461
--- /dev/null
+++ b/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll
@@ -0,0 +1,146 @@
+; RUN: llc < %s
+; PR6363
+;
+; This test case creates a phi join register with a single definition. The other
+; predecessor blocks are implicit-def.
+;
+; If LiveIntervalAnalysis fails to recognize this as a phi join, the coalescer
+; will detect an infinity valno loop.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @decode(i8* nocapture %input, i32 %offset, i8* nocapture %output) nounwind {
+entry:
+  br i1 undef, label %meshBB86, label %meshBB102
+
+bb:                                               ; preds = %meshBB106, %meshBB102
+  br i1 false, label %bb9, label %meshBB90
+
+bb.nph:                                           ; preds = %meshBB90
+  br label %meshBB114
+
+bb.nph.fragment:                                  ; preds = %meshBB114
+  br label %meshBB118
+
+bb1.fragment:                                     ; preds = %meshBB118
+  br i1 false, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1.fragment
+  br label %meshBB74
+
+bb2.fragment15:                                   ; preds = %meshBB74
+  br label %meshBB98
+
+bb3:                                              ; preds = %bb1.fragment
+  br i1 undef, label %meshBB, label %meshBB102
+
+bb4:                                              ; preds = %meshBB
+  br label %meshBB118
+
+bb4.fragment:                                     ; preds = %meshBB118
+  br label %meshBB82
+
+bb5:                                              ; preds = %meshBB102, %meshBB82
+  br i1 false, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb5
+  br label %bb7
+
+bb7:                                              ; preds = %meshBB98, %bb6, %bb5
+  br label %meshBB114
+
+bb7.fragment:                                     ; preds = %meshBB114
+  br i1 undef, label %meshBB74, label %bb9
+
+bb9:                                              ; preds = %bb7.fragment, %bb
+  br label %bb1.i23
+
+bb1.i23:                                          ; preds = %meshBB110, %bb9
+  br i1 undef, label %meshBB106, label %meshBB110
+
+skip_to_newline.exit26:                           ; preds = %meshBB106
+  br label %meshBB86
+
+skip_to_newline.exit26.fragment:                  ; preds = %meshBB86
+  br i1 false, label %meshBB90, label %meshBB106
+
+bb11.fragment:                                    ; preds = %meshBB90, %meshBB86
+  br label %meshBB122
+
+bb1.i:                                            ; preds = %meshBB122, %meshBB
+  %ooffset.2.lcssa.phi.SV.phi203 = phi i32 [ 0, %meshBB122 ], [ %ooffset.2.lcssa.phi.SV.phi233, %meshBB ] ; <i32> [#uses=1]
+  br label %meshBB98
+
+bb1.i.fragment:                                   ; preds = %meshBB98
+  br i1 undef, label %meshBB78, label %meshBB
+
+skip_to_newline.exit:                             ; preds = %meshBB78
+  br i1 undef, label %bb12, label %meshBB110
+
+bb12:                                             ; preds = %skip_to_newline.exit
+  br label %meshBB94
+
+bb12.fragment:                                    ; preds = %meshBB94
+  br i1 false, label %bb13, label %meshBB78
+
+bb13:                                             ; preds = %bb12.fragment
+  br label %meshBB82
+
+bb13.fragment:                                    ; preds = %meshBB82
+  br i1 undef, label %meshBB94, label %meshBB122
+
+bb14:                                             ; preds = %meshBB94
+  ret i32 %ooffset.2.lcssa.phi.SV.phi250
+
+bb15:                                             ; preds = %meshBB122, %meshBB110, %meshBB78
+  unreachable
+
+meshBB:                                           ; preds = %bb1.i.fragment, %bb3
+  %ooffset.2.lcssa.phi.SV.phi233 = phi i32 [ undef, %bb3 ], [ %ooffset.2.lcssa.phi.SV.phi209, %bb1.i.fragment ] ; <i32> [#uses=1]
+  br i1 undef, label %bb1.i, label %bb4
+
+meshBB74:                                         ; preds = %bb7.fragment, %bb2
+  br i1 false, label %meshBB118, label %bb2.fragment15
+
+meshBB78:                                         ; preds = %bb12.fragment, %bb1.i.fragment
+  %ooffset.2.lcssa.phi.SV.phi239 = phi i32 [ %ooffset.2.lcssa.phi.SV.phi209, %bb1.i.fragment ], [ %ooffset.2.lcssa.phi.SV.phi250, %bb12.fragment ] ; <i32> [#uses=1]
+  br i1 false, label %bb15, label %skip_to_newline.exit
+
+meshBB82:                                         ; preds = %bb13, %bb4.fragment
+  br i1 false, label %bb5, label %bb13.fragment
+
+meshBB86:                                         ; preds = %skip_to_newline.exit26, %entry
+  br i1 undef, label %skip_to_newline.exit26.fragment, label %bb11.fragment
+
+meshBB90:                                         ; preds = %skip_to_newline.exit26.fragment, %bb
+  br i1 false, label %bb11.fragment, label %bb.nph
+
+meshBB94:                                         ; preds = %bb13.fragment, %bb12
+  %ooffset.2.lcssa.phi.SV.phi250 = phi i32 [ 0, %bb13.fragment ], [ %ooffset.2.lcssa.phi.SV.phi239, %bb12 ] ; <i32> [#uses=2]
+  br i1 false, label %bb12.fragment, label %bb14
+
+meshBB98:                                         ; preds = %bb1.i, %bb2.fragment15
+  %ooffset.2.lcssa.phi.SV.phi209 = phi i32 [ undef, %bb2.fragment15 ], [ %ooffset.2.lcssa.phi.SV.phi203, %bb1.i ] ; <i32> [#uses=2]
+  br i1 undef, label %bb1.i.fragment, label %bb7
+
+meshBB102:                                        ; preds = %bb3, %entry
+  br i1 undef, label %bb5, label %bb
+
+meshBB106:                                        ; preds = %skip_to_newline.exit26.fragment, %bb1.i23
+  br i1 undef, label %bb, label %skip_to_newline.exit26
+
+meshBB110:                                        ; preds = %skip_to_newline.exit, %bb1.i23
+  br i1 false, label %bb15, label %bb1.i23
+
+meshBB114:                                        ; preds = %bb7, %bb.nph
+  %meshStackVariable115.phi = phi i32 [ 19, %bb7 ], [ 8, %bb.nph ] ; <i32> [#uses=0]
+  br i1 undef, label %bb.nph.fragment, label %bb7.fragment
+
+meshBB118:                                        ; preds = %meshBB74, %bb4, %bb.nph.fragment
+  %meshCmp121 = icmp eq i32 undef, 10             ; <i1> [#uses=1]
+  br i1 %meshCmp121, label %bb4.fragment, label %bb1.fragment
+
+meshBB122:                                        ; preds = %bb13.fragment, %bb11.fragment
+  br i1 false, label %bb1.i, label %bb15
+}
diff --git a/test/CodeGen/X86/2010-03-04-Mul8Bug.ll b/test/CodeGen/X86/2010-03-04-Mul8Bug.ll
new file mode 100644
index 0000000000..48e75e9572
--- /dev/null
+++ b/test/CodeGen/X86/2010-03-04-Mul8Bug.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s
+; PR6489
+;
+; This test case produces a MUL8 instruction and then tries to read the result
+; from the AX register instead of AH/AL. That confuses live interval analysis.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @func_56(i64 %p_57, i32*** %p_58) nounwind ssp {
+for.end:
+  %conv49 = trunc i32 undef to i8                 ; <i8> [#uses=1]
+  %div.i = udiv i8 %conv49, 5                     ; <i8> [#uses=1]
+  %conv51 = zext i8 %div.i to i32                 ; <i32> [#uses=1]
+  %call55 = call i32 @qux(i32 undef, i32 -2) nounwind ; <i32> [#uses=1]
+  %rem.i = urem i32 %call55, -1                   ; <i32> [#uses=1]
+  %cmp57 = icmp uge i32 %conv51, %rem.i           ; <i1> [#uses=1]
+  %conv58 = zext i1 %cmp57 to i32                 ; <i32> [#uses=1]
+  %call85 = call i32 @func_35(i32*** undef, i32 undef, i32 %conv58, i32 1247, i32 0) nounwind ; <i32> [#uses=0]
+  ret void
+}
+
+declare i32 @func_35(i32***, i32, i32, i32, i32)
+
+declare i32 @qux(i32, i32)
diff --git a/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll b/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll
new file mode 100644
index 0000000000..5de19662ff
--- /dev/null
+++ b/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -verify-machineinstrs
+;
+; When BRCOND is constant-folded to BR, make sure that PHI nodes don't get
+; spurious operands when the CFG is trimmed.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+define fastcc void @_ZSt16__introsort_loopIPdl17less_than_functorEvT_S2_T0_T1_(double* %__first, double* %__last, i64 %__depth_limit) nounwind ssp {
+entry:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %entry
+  ret void
+
+bb2:                                              ; preds = %entry
+  br label %bb2.outer.i
+
+bb2.outer.i:                                      ; preds = %bb9.i, %bb2
+  br i1 undef, label %bb1.i, label %bb5.preheader.i
+
+bb1.i:                                            ; preds = %bb1.i, %bb2.outer.i
+  %indvar5.i = phi i64 [ %tmp, %bb1.i ], [ 0, %bb2.outer.i ] ; <i64> [#uses=1]
+  %tmp = add i64 %indvar5.i, 1                    ; <i64> [#uses=2]
+  %scevgep.i = getelementptr double* undef, i64 %tmp ; <double*> [#uses=0]
+  br i1 undef, label %bb1.i, label %bb5.preheader.i
+
+bb5.preheader.i:                                  ; preds = %bb1.i, %bb2.outer.i
+  br label %bb5.i
+
+bb5.i:                                            ; preds = %bb5.i, %bb5.preheader.i
+  br i1 undef, label %bb5.i, label %bb7.i6
+
+bb7.i6:                                           ; preds = %bb5.i
+  br i1 undef, label %bb9.i, label %_ZSt21__unguarded_partitionIPdd17less_than_functorET_S2_S2_T0_T1_.exit
+
+bb9.i:                                            ; preds = %bb7.i6
+  br label %bb2.outer.i
+
+_ZSt21__unguarded_partitionIPdd17less_than_functorET_S2_S2_T0_T1_.exit: ; preds = %bb7.i6
+  unreachable
+}
diff --git a/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll b/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll
new file mode 100644
index 0000000000..3cca10e268
--- /dev/null
+++ b/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -verify-machineinstrs
+;
+; This test case is transformed into a single basic block by the machine
+; branch folding pass. That makes a complete mess of the %EFLAGS liveness, but
+; we don't care about liveness this late anyway.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) ssp {
+entry:
+  br i1 undef, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  br i1 undef, label %bb3, label %bb5
+
+bb3:                                              ; preds = %bb2
+  br label %bb5
+
+bb5:                                              ; preds = %bb3, %bb2
+  br i1 undef, label %bb.nph239, label %bb8
+
+bb.nph239:                                        ; preds = %bb5
+  unreachable
+
+bb8:                                              ; preds = %bb5
+  br i1 undef, label %bb.nph237, label %bb47
+
+bb.nph237:                                        ; preds = %bb8
+  unreachable
+
+bb47:                                             ; preds = %bb8
+  br i1 undef, label %bb49, label %bb48
+
+bb48:                                             ; preds = %bb47
+  unreachable
+
+bb49:                                             ; preds = %bb47
+  br i1 undef, label %bb51, label %bb50
+
+bb50:                                             ; preds = %bb49
+  ret i32 0
+
+bb51:                                             ; preds = %bb49
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/addr-label-difference.ll b/test/CodeGen/X86/addr-label-difference.ll
index 547d6b5765..be0908aa1a 100644
--- a/test/CodeGen/X86/addr-label-difference.ll
+++ b/test/CodeGen/X86/addr-label-difference.ll
@@ -9,14 +9,18 @@ target triple = "i386-apple-darwin10.0"
 
 define void @test(i32 %i) nounwind ssp {
 entry:
+  call void @test(i32 1)
   br label %foo
 
-foo:                                              ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto
+foo:
+  call void @test(i32 1)
   br label %bar
 
-bar:                                              ; preds = %foo, %indirectgoto
+bar:
+  call void @test(i32 1)
   br label %hack
 
-hack:                                             ; preds = %bar, %indirectgoto
+hack:
+  call void @test(i32 1)
   ret void
 }
diff --git a/test/CodeGen/X86/and-or-fold.ll b/test/CodeGen/X86/and-or-fold.ll
index 7733b8a5ba..836b5f1551 100644
--- a/test/CodeGen/X86/and-or-fold.ll
+++ b/test/CodeGen/X86/and-or-fold.ll
@@ -1,14 +1,26 @@
-; RUN: llc < %s -march=x86 | grep and | count 1
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck -check-prefix=DARWIN %s
+; RUN: opt < %s -O2 | llc -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=DARWIN-OPT %s
 
 ; The dag combiner should fold together (x&127)|(y&16711680) -> (x|y)&c1
 ; in this case.
 
-define i32 @test6(i32 %x, i16 %y) {
-        %tmp1 = zext i16 %y to i32              ; <i32> [#uses=1]
-        %tmp2 = and i32 %tmp1, 127              ; <i32> [#uses=1]
-        %tmp4 = shl i32 %x, 16          ; <i32> [#uses=1]
-        %tmp5 = and i32 %tmp4, 16711680         ; <i32> [#uses=1]
-        %tmp6 = or i32 %tmp2, %tmp5             ; <i32> [#uses=1]
-        ret i32 %tmp6
+define i32 @test1(i32 %x, i16 %y) {
+  %tmp1 = zext i16 %y to i32
+  %tmp2 = and i32 %tmp1, 127
+  %tmp4 = shl i32 %x, 16
+  %tmp5 = and i32 %tmp4, 16711680
+  %tmp6 = or i32 %tmp2, %tmp5
+  ret i32 %tmp6
+; DARWIN: andl $16711807, %eax
 }
 
+; <rdar://problem/7529774> The optimizer shouldn't fold this into (and (or, C),  D)
+; if (C & D) == 0
+define i64 @test2(i64 %x) nounwind readnone ssp {
+entry:
+  %tmp1 = and i64 %x, 123127
+  %tmp2 = or i64 %tmp1, 3
+  ret i64 %tmp2
+; DARWIN-OPT:       andq $123124
+; DARWIN-OPT-NEXT:  leaq 3
+}
diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
index 5bf58fa1d5..2b7019371a 100644
--- a/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/test/CodeGen/X86/bswap-inline-asm.ll
@@ -1,17 +1,80 @@
 ; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep APP %t
-; RUN: grep bswapq %t | count 2
-; RUN: grep bswapl %t | count 1
+; RUN: FileCheck %s < %t
 
+; CHECK: foo:
+; CHECK: bswapq
 define i64 @foo(i64 %x) nounwind {
 	%asmtmp = tail call i64 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
 	ret i64 %asmtmp
 }
+
+; CHECK: bar:
+; CHECK: bswapq
 define i64 @bar(i64 %x) nounwind {
 	%asmtmp = tail call i64 asm "bswapq ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
 	ret i64 %asmtmp
 }
+
+; CHECK: pen:
+; CHECK: bswapl
 define i32 @pen(i32 %x) nounwind {
 	%asmtmp = tail call i32 asm "bswapl ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
 	ret i32 %asmtmp
 }
+
+; CHECK: s16:
+; CHECK: rolw    $8,
+define zeroext i16 @s16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: t16:
+; CHECK: rolw    $8,
+define zeroext i16 @t16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: u16:
+; CHECK: rolw    $8,
+define zeroext i16 @u16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: v16:
+; CHECK: rolw    $8,
+define zeroext i16 @v16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: s32:
+; CHECK: bswapl
+define i32 @s32(i32 %x) nounwind {
+  %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
+
+; CHECK: t32:
+; CHECK: bswapl
+define i32 @t32(i32 %x) nounwind {
+  %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
+
+; CHECK: s64:
+; CHECK: bswapq
+define i64 @s64(i64 %x) nounwind {
+  %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+  ret i64 %asmtmp
+}
+
+; CHECK: t64:
+; CHECK: bswapq
+define i64 @t64(i64 %x) nounwind {
+  %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{fpsr},~{dirflag},~{flags}"(i64 %x) nounwind
+  ret i64 %asmtmp
+}
diff --git a/test/CodeGen/X86/code_placement_eh.ll b/test/CodeGen/X86/code_placement_eh.ll
new file mode 100644
index 0000000000..172d5910d0
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_eh.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s
+
+; CodePlacementOpt shouldn't try to modify this loop because
+; it involves EH edges.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0"
+
+define void @foo() {
+invcont5:
+  br label %bb15
+
+.noexc3:                                          ; preds = %bb15
+  br i1 undef, label %bb18.i5.i, label %bb15
+
+.noexc6.i.i:                                      ; preds = %bb18.i5.i
+  %tmp2021 = invoke float @cosf(float 0.000000e+00) readonly
+          to label %bb18.i5.i unwind label %lpad.i.i ; <float> [#uses=0]
+
+bb18.i5.i:                                        ; preds = %.noexc6.i.i, %bb51.i
+  %tmp2019 = invoke float @sinf(float 0.000000e+00) readonly
+          to label %.noexc6.i.i unwind label %lpad.i.i ; <float> [#uses=0]
+
+lpad.i.i:                                         ; preds = %bb18.i5.i, %.noexc6.i.i
+  %eh_ptr.i.i = call i8* @llvm.eh.exception()     ; <i8*> [#uses=1]
+  unreachable
+
+lpad59.i:                                         ; preds = %bb15
+  %eh_ptr60.i = call i8* @llvm.eh.exception()     ; <i8*> [#uses=1]
+  unreachable
+
+bb15:                                             ; preds = %.noexc3, %invcont5
+  invoke fastcc void @_ZN28btHashedOverlappingPairCacheC2Ev()
+          to label %.noexc3 unwind label %lpad59.i
+}
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare float @sinf(float) readonly
+
+declare float @cosf(float) readonly
+
+declare fastcc void @_ZN28btHashedOverlappingPairCacheC2Ev() align 2
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
new file mode 100644
index 0000000000..1e13046f2a
--- /dev/null
+++ b/test/CodeGen/X86/crash.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 %s -o -
+; RUN: llc -march=x86-64 %s -o -
+
+; PR6497
+
+; Chain and flag folding issues.
+define i32 @test1() nounwind ssp {
+entry:
+  %tmp5.i = volatile load i32* undef              ; <i32> [#uses=1]
+  %conv.i = zext i32 %tmp5.i to i64               ; <i64> [#uses=1]
+  %tmp12.i = volatile load i32* undef             ; <i32> [#uses=1]
+  %conv13.i = zext i32 %tmp12.i to i64            ; <i64> [#uses=1]
+  %shl.i = shl i64 %conv13.i, 32                  ; <i64> [#uses=1]
+  %or.i = or i64 %shl.i, %conv.i                  ; <i64> [#uses=1]
+  %add16.i = add i64 %or.i, 256                   ; <i64> [#uses=1]
+  %shr.i = lshr i64 %add16.i, 8                   ; <i64> [#uses=1]
+  %conv19.i = trunc i64 %shr.i to i32             ; <i32> [#uses=1]
+  volatile store i32 %conv19.i, i32* undef
+  ret i32 undef
+}
diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll
index 4fe554de75..f29cbf323e 100644
--- a/test/CodeGen/X86/critical-edge-split.ll
+++ b/test/CodeGen/X86/critical-edge-split.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -tailcallopt=false -stats -info-output-file - | grep asm-printer | grep 31
+; RUN: llc < %s -mtriple=i386-apple-darwin -stats -info-output-file - | grep asm-printer | grep 29
 
 	%CC = type { %Register }
 	%II = type { %"struct.XX::II::$_74" }
diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll
new file mode 100644
index 0000000000..2c699bfb0d
--- /dev/null
+++ b/test/CodeGen/X86/dllexport.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s | FileCheck %s
+; PR2936
+
+target triple = "i386-mingw32"
+
+define dllexport x86_fastcallcc i32 @foo() nounwind  {
+entry:
+	ret i32 0
+}
+
+; CHECK: .section .drectve
+; CHECK: -export:@foo@0
+\ No newline at end of file
diff --git a/test/CodeGen/X86/fastcall-correct-mangling.ll b/test/CodeGen/X86/fastcall-correct-mangling.ll
index 2b48f5f371..33b18bb8cc 100644
--- a/test/CodeGen/X86/fastcall-correct-mangling.ll
+++ b/test/CodeGen/X86/fastcall-correct-mangling.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -mtriple=i386-unknown-mingw32 | \
-; RUN:   grep {@12}
+; RUN: llc < %s -mtriple=i386-unknown-mingw32 | FileCheck %s
 
 ; Check that a fastcall function gets correct mangling
 
 define x86_fastcallcc void @func(i64 %X, i8 %Y, i8 %G, i16 %Z) {
+; CHECK: @func@20:
         ret void
 }
 
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 3bd58b65be..ff9b1b0b6a 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,12 +1,7 @@
 ; RUN: llc < %s -march=x86 >%t
 
-; TODO: Enhance full lsr mode to get this:
-; RUNX: grep {addl	\\\$4,} %t | count 3
-; RUNX: not grep {,%} %t
-
-; For now, it should find this, which is still pretty good:
-; RUN: not grep {addl	\\\$4,} %t
-; RUN: grep {,%} %t | count 6
+; RUN: grep {addl	\\\$4,} %t | count 3
+; RUN: not grep {,%} %t
 
 define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
 entry:
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index 1a7b5777ae..d79c56bc46 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -100,7 +100,7 @@
 
 @G8 = constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
 
-; DARWIN:	.section	__TEXT,__ustring
+; DARWIN:	.section	__TEXT,__const
 ; DARWIN:	.globl _G8
 ; DARWIN: _G8:
 
@@ -110,7 +110,6 @@
 
 @G9 = constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
 
-; DARWIN:	.section        __TEXT,__const
 ; DARWIN:	.globl _G9
 ; DARWIN: _G9:
 
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/test/CodeGen/X86/ins_subreg_coalesce-3.ll
index 627edc51c1..8c1c409766 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep mov | count 5
+; RUN: llc < %s -march=x86-64 | grep mov | count 3
 
 	%struct.COMPOSITE = type { i8, i16, i16 }
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index c695c29e06..408fb20b8d 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -march=x86-64 -o %t
-; RUN: grep inc %t | count 1
+; RUN: not grep inc %t
 ; RUN: grep dec %t | count 2
 ; RUN: grep addq %t | count 13
 ; RUN: not grep addb %t
-; RUN: grep leaq %t | count 9
-; RUN: grep leal %t | count 3
-; RUN: grep movq %t | count 5
+; RUN: not grep leaq %t
+; RUN: not grep leal %t
+; RUN: not grep movq %t
 
 ; IV users in each of the loops from other loops shouldn't cause LSR
 ; to insert new induction variables. Previously it would create a
diff --git a/test/CodeGen/X86/licm-symbol.ll b/test/CodeGen/X86/licm-symbol.ll
new file mode 100644
index 0000000000..d61bbfccbc
--- /dev/null
+++ b/test/CodeGen/X86/licm-symbol.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s | FileCheck %s
+
+; MachineLICM should be able to hoist the sF reference out of the loop.
+
+; CHECK: pushl %esi
+; CHECK: subl  $8, %esp
+; CHECK: movl  $176, %esi
+; CHECK: addl  L___sF$non_lazy_ptr, %esi
+; CHECK: .align  4, 0x90
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin8"
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.gcov_ctr_summary = type { i32, i32, i64, i64, i64 }
+%struct.gcov_summary = type { i32, [1 x %struct.gcov_ctr_summary] }
+
+@__sF = external global [0 x %struct.FILE]        ; <[0 x %struct.FILE]*> [#uses=1]
+
+declare i32 @fprintf(%struct.FILE* nocapture) nounwind
+
+define void @gcov_exit() nounwind {
+entry:
+  br label %bb151
+
+bb151:                                            ; preds = %bb59, %bb56, %bb14
+  br i1 undef, label %bb56, label %bb59
+
+bb56:                                             ; preds = %bb151
+  %t0 = call i32 (%struct.FILE*)* @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
+  br label %bb151
+
+bb59:                                             ; preds = %bb151
+  %t1 = call i32 (%struct.FILE*)* @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
+  br label %bb151
+}
+
diff --git a/test/CodeGen/X86/loop-strength-reduce-2.ll b/test/CodeGen/X86/loop-strength-reduce-2.ll
index 30b5114349..b546462b68 100644
--- a/test/CodeGen/X86/loop-strength-reduce-2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-2.ll
@@ -1,11 +1,24 @@
-; RUN: llc < %s -march=x86 -relocation-model=pic | \
-; RUN:   grep {, 4} | count 1
-; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s -check-prefix=STATIC
 ;
 ; Make sure the common loop invariant A is hoisted up to preheader,
 ; since too many registers are needed to subsume it into the addressing modes.
 ; It's safe to sink A in when it's not pic.
 
+; PIC:  align
+; PIC:  movl  $4, -4([[REG:%e[a-z]+]])
+; PIC:  movl  $5, ([[REG]])
+; PIC:  addl  $4, [[REG]]
+; PIC:  decl  {{%e[[a-z]+}}
+; PIC:  jne
+
+; STATIC: align
+; STATIC: movl  $4, -4(%ecx)
+; STATIC: movl  $5, (%ecx)
+; STATIC: addl  $4, %ecx
+; STATIC: decl  %eax
+; STATIC: jne
+
 @A = global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
 
 define void @test(i32 %row, i32 %N.in) nounwind {
diff --git a/test/CodeGen/X86/loop-strength-reduce-3.ll b/test/CodeGen/X86/loop-strength-reduce-3.ll
index 70c91340c9..b1c9fb9c07 100644
--- a/test/CodeGen/X86/loop-strength-reduce-3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-3.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | \
-; RUN:   grep {A+} | count 2
-;
-; Make sure the common loop invariant A is not hoisted up to preheader,
-; since it can be subsumed it into the addressing modes.
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s
+
+; CHECK: align
+; CHECK: movl  $4, -4(%ecx)
+; CHECK: movl  $5, (%ecx)
+; CHECK: addl  $4, %ecx
+; CHECK: decl  %eax
+; CHECK: jne
 
 @A = global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
 
diff --git a/test/CodeGen/X86/loop-strength-reduce.ll b/test/CodeGen/X86/loop-strength-reduce.ll
index 4cb56ca9ed..42c6ac4983 100644
--- a/test/CodeGen/X86/loop-strength-reduce.ll
+++ b/test/CodeGen/X86/loop-strength-reduce.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -march=x86 -relocation-model=static | \
-; RUN:   grep {A+} | count 2
-;
-; Make sure the common loop invariant A is not hoisted up to preheader,
-; since it can be subsumed into the addressing mode in all uses.
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
+
+; CHECK: align
+; CHECK: movl  $4, -4(%ecx)
+; CHECK: movl  $5, (%ecx)
+; CHECK: addl  $4, %ecx
+; CHECK: decl  %eax
+; CHECK: jne
 
 @A = internal global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
 
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 07e46eca75..6c0eb8c0df 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -1,5 +1,19 @@
-; RUN: llc < %s -march=x86 | grep cmp | grep 64
-; RUN: llc < %s -march=x86 | not grep inc
+; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+
+; By starting the IV at -64 instead of 0, a cmp is eliminated,
+; as the flags from the add can be used directly.
+
+; STATIC: movl    $-64, %ecx
+
+; STATIC: movl    %eax, _state+76(%ecx)
+; STATIC: addl    $16, %ecx
+; STATIC: jne
+
+; In PIC mode the symbol can't be folded, so the change-compare-stride
+; trick applies.
+
+; PIC: cmpl $64
 
 @state = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
 @S = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll
index e14cd8a99e..6b2247d1d6 100644
--- a/test/CodeGen/X86/loop-strength-reduce8.ll
+++ b/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -1,4 +1,10 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; CHECK: leal 16(%eax), %edx
+; CHECK: align
+; CHECK: addl    $4, %edx
+; CHECK: decl    %ecx
+; CHECK: jne     LBB1_2
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
 	%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
diff --git a/test/CodeGen/X86/lsr-overflow.ll b/test/CodeGen/X86/lsr-overflow.ll
new file mode 100644
index 0000000000..0b0214c6d9
--- /dev/null
+++ b/test/CodeGen/X86/lsr-overflow.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; The comparison uses the pre-inc value, which could lead LSR to
+; try to compute -INT64_MIN.
+
+; CHECK: movabsq $-9223372036854775808, %rax
+; CHECK: cmpq  %rax, %rbx
+; CHECK: sete  %al
+
+declare i64 @bar()
+
+define i1 @foo() nounwind {
+entry:
+  br label %for.cond.i
+
+for.cond.i:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.cond.i ]
+  %t = call i64 @bar()
+  %indvar.next = add i64 %indvar, 1
+  %s = icmp ne i64 %indvar.next, %t
+  br i1 %s, label %for.cond.i, label %__ABContainsLabel.exit
+
+__ABContainsLabel.exit:
+  %cmp = icmp eq i64 %indvar, 9223372036854775807
+  ret i1 %cmp
+}
diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll
new file mode 100644
index 0000000000..d1d714491f
--- /dev/null
+++ b/test/CodeGen/X86/lsr-reuse-trunc.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; Full strength reduction wouldn't reduce register pressure, so LSR should
+; stick with indexing here.
+
+; CHECK: movaps        (%rsi,%rax,4), %xmm3
+; CHECK: movaps        %xmm3, (%rdi,%rax,4)
+; CHECK: addq  $4, %rax
+; CHECK: cmpl  %eax, (%rdx)
+; CHECK-NEXT: jg
+
+define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
+entry:
+  %0 = load i32* %n, align 4
+  %1 = icmp sgt i32 %0, 0
+  br i1 %1, label %bb, label %return
+
+bb:
+  %indvar = phi i64 [ %indvar.next, %bb ], [ 0, %entry ]
+  %tmp = shl i64 %indvar, 2
+  %scevgep = getelementptr float* %y, i64 %tmp
+  %scevgep9 = bitcast float* %scevgep to <4 x float>*
+  %scevgep10 = getelementptr float* %x, i64 %tmp
+  %scevgep1011 = bitcast float* %scevgep10 to <4 x float>*
+  %2 = load <4 x float>* %scevgep1011, align 16
+  %3 = bitcast <4 x float> %2 to <4 x i32>
+  %4 = and <4 x i32> %3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+  %5 = bitcast <4 x i32> %4 to <4 x float>
+  %6 = and <4 x i32> %3, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %7 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %5, <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06>, i8 5) nounwind
+  %tmp.i4 = bitcast <4 x float> %7 to <4 x i32>
+  %8 = xor <4 x i32> %tmp.i4, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %9 = and <4 x i32> %8, <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200>
+  %10 = or <4 x i32> %9, %6
+  %11 = bitcast <4 x i32> %10 to <4 x float>
+  %12 = fadd <4 x float> %2, %11
+  %13 = fsub <4 x float> %12, %11
+  %14 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %2, <4 x float> %13, i8 1) nounwind
+  %15 = bitcast <4 x float> %14 to <4 x i32>
+  %16 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %15) nounwind readnone
+  %17 = fadd <4 x float> %13, %16
+  %tmp.i = bitcast <4 x float> %17 to <4 x i32>
+  %18 = or <4 x i32> %tmp.i, %6
+  %19 = bitcast <4 x i32> %18 to <4 x float>
+  store <4 x float> %19, <4 x float>* %scevgep9, align 16
+  %tmp12 = add i64 %tmp, 4
+  %tmp13 = trunc i64 %tmp12 to i32
+  %20 = load i32* %n, align 4
+  %21 = icmp sgt i32 %20, %tmp13
+  %indvar.next = add i64 %indvar, 1
+  br i1 %21, label %bb, label %return
+
+return:
+  ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
new file mode 100644
index 0000000000..2f6fb3fa8b
--- /dev/null
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -0,0 +1,442 @@
+; RUN: llc < %s -march=x86-64 -O3 -asm-verbose=false | FileCheck %s
+target datalayout = "e-p:64:64:64"
+target triple = "x86_64-unknown-unknown"
+
+; Full strength reduction reduces register pressure from 5 to 4 here.
+; Instruction selection should use the FLAGS value from the dec for
+; the branch. Scheduling should push the adds upwards.
+
+; CHECK: full_me_0:
+; CHECK: movsd   (%rsi), %xmm0
+; CHECK: addq    $8, %rsi
+; CHECK: mulsd   (%rdx), %xmm0
+; CHECK: addq    $8, %rdx
+; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: addq    $8, %rdi
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @full_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; Mostly-full strength reduction means we do full strength reduction on all
+; except for the offsets.
+;
+; Given a choice between constant offsets -2048 and 2048, choose the negative
+; value, because at boundary conditions it has a smaller encoding.
+; TODO: That's an over-general heuristic. It would be better for the target
+; to indicate what the encoding cost would be. Then using a 2048 offset
+; would be better on x86-64, since the start value would be 0 instead of
+; 2048.
+
+; CHECK: mostly_full_me_0:
+; CHECK: movsd   -2048(%rsi), %xmm0
+; CHECK: mulsd   -2048(%rdx), %xmm0
+; CHECK: movsd   %xmm0, -2048(%rdi)
+; CHECK: movsd   (%rsi), %xmm0
+; CHECK: addq    $8, %rsi
+; CHECK: divsd   (%rdx), %xmm0
+; CHECK: addq    $8, %rdx
+; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: addq    $8, %rdi
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @mostly_full_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %j = add i64 %i, 256
+  %Aj = getelementptr inbounds double* %A, i64 %j
+  %Bj = getelementptr inbounds double* %B, i64 %j
+  %Cj = getelementptr inbounds double* %C, i64 %j
+  %t3 = load double* %Bj
+  %t4 = load double* %Cj
+  %o = fdiv double %t3, %t4
+  store double %o, double* %Aj
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; A minor variation on mostly_full_me_0.
+; Prefer to start the indvar at 0.
+
+; CHECK: mostly_full_me_1:
+; CHECK: movsd   (%rsi), %xmm0
+; CHECK: mulsd   (%rdx), %xmm0
+; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: movsd   -2048(%rsi), %xmm0
+; CHECK: addq    $8, %rsi
+; CHECK: divsd   -2048(%rdx), %xmm0
+; CHECK: addq    $8, %rdx
+; CHECK: movsd   %xmm0, -2048(%rdi)
+; CHECK: addq    $8, %rdi
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @mostly_full_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %j = sub i64 %i, 256
+  %Aj = getelementptr inbounds double* %A, i64 %j
+  %Bj = getelementptr inbounds double* %B, i64 %j
+  %Cj = getelementptr inbounds double* %C, i64 %j
+  %t3 = load double* %Bj
+  %t4 = load double* %Cj
+  %o = fdiv double %t3, %t4
+  store double %o, double* %Aj
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; A slightly less minor variation on mostly_full_me_0.
+
+; CHECK: mostly_full_me_2:
+; CHECK: movsd   (%rsi), %xmm0
+; CHECK: mulsd   (%rdx), %xmm0
+; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: movsd   -4096(%rsi), %xmm0
+; CHECK: addq    $8, %rsi
+; CHECK: divsd   -4096(%rdx), %xmm0
+; CHECK: addq    $8, %rdx
+; CHECK: movsd   %xmm0, -4096(%rdi)
+; CHECK: addq    $8, %rdi
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @mostly_full_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %k = add i64 %i, 256
+  %Ak = getelementptr inbounds double* %A, i64 %k
+  %Bk = getelementptr inbounds double* %B, i64 %k
+  %Ck = getelementptr inbounds double* %C, i64 %k
+  %t1 = load double* %Bk
+  %t2 = load double* %Ck
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ak
+  %j = sub i64 %i, 256
+  %Aj = getelementptr inbounds double* %A, i64 %j
+  %Bj = getelementptr inbounds double* %B, i64 %j
+  %Cj = getelementptr inbounds double* %C, i64 %j
+  %t3 = load double* %Bj
+  %t4 = load double* %Cj
+  %o = fdiv double %t3, %t4
+  store double %o, double* %Aj
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; In this test, the counting IV exit value is used, so full strength reduction
+; would not reduce register pressure. IndVarSimplify ought to simplify such
+; cases away, but it's useful here to verify that LSR's register pressure
+; heuristics are working as expected.
+
+; CHECK: count_me_0:
+; CHECK: movsd   (%rsi,%rax,8), %xmm0
+; CHECK: mulsd   (%rdx,%rax,8), %xmm0
+; CHECK: movsd   %xmm0, (%rdi,%rax,8)
+; CHECK: incq    %rax
+; CHECK: cmpq    %rax, %rcx
+; CHECK: jne
+
+define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  %q = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  ret i64 %q
+}
+
+; In this test, the trip count value is used, so full strength reduction
+; would not reduce register pressure.
+; (though it would reduce register pressure inside the loop...)
+
+; CHECK: count_me_1:
+; CHECK: movsd   (%rsi,%rax,8), %xmm0
+; CHECK: mulsd   (%rdx,%rax,8), %xmm0
+; CHECK: movsd   %xmm0, (%rdi,%rax,8)
+; CHECK: incq    %rax
+; CHECK: cmpq    %rax, %rcx
+; CHECK: jne
+
+define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  %q = phi i64 [ 0, %entry ], [ %n, %loop ]
+  ret i64 %q
+}
+
+; Full strength reduction doesn't save any registers here because the
+; loop tripcount is a constant.
+
+; CHECK: count_me_2:
+; CHECK: movl    $10, %eax
+; CHECK: align
+; CHECK: BB7_1:
+; CHECK: movsd   -40(%rdi,%rax,8), %xmm0
+; CHECK: addsd   -40(%rsi,%rax,8), %xmm0
+; CHECK: movsd   %xmm0, -40(%rdx,%rax,8)
+; CHECK: movsd   (%rdi,%rax,8), %xmm0
+; CHECK: subsd   (%rsi,%rax,8), %xmm0
+; CHECK: movsd   %xmm0, (%rdx,%rax,8)
+; CHECK: incq    %rax
+; CHECK: cmpq    $5010, %rax
+; CHECK: jne
+
+define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %i5 = add i64 %i, 5
+  %Ai = getelementptr double* %A, i64 %i5
+  %t2 = load double* %Ai
+  %Bi = getelementptr double* %B, i64 %i5
+  %t4 = load double* %Bi
+  %t5 = fadd double %t2, %t4
+  %Ci = getelementptr double* %C, i64 %i5
+  store double %t5, double* %Ci
+  %i10 = add i64 %i, 10
+  %Ai10 = getelementptr double* %A, i64 %i10
+  %t9 = load double* %Ai10
+  %Bi10 = getelementptr double* %B, i64 %i10
+  %t11 = load double* %Bi10
+  %t12 = fsub double %t9, %t11
+  %Ci10 = getelementptr double* %C, i64 %i10
+  store double %t12, double* %Ci10
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 5000
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; This should be fully strength-reduced to reduce register pressure.
+
+; CHECK: full_me_1:
+; CHECK: align
+; CHECK: BB8_1:
+; CHECK: movsd   (%rdi), %xmm0
+; CHECK: addsd   (%rsi), %xmm0
+; CHECK: movsd   %xmm0, (%rdx)
+; CHECK: movsd   40(%rdi), %xmm0
+; CHECK: addq    $8, %rdi
+; CHECK: subsd   40(%rsi), %xmm0
+; CHECK: addq    $8, %rsi
+; CHECK: movsd   %xmm0, 40(%rdx)
+; CHECK: addq    $8, %rdx
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @full_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %i5 = add i64 %i, 5
+  %Ai = getelementptr double* %A, i64 %i5
+  %t2 = load double* %Ai
+  %Bi = getelementptr double* %B, i64 %i5
+  %t4 = load double* %Bi
+  %t5 = fadd double %t2, %t4
+  %Ci = getelementptr double* %C, i64 %i5
+  store double %t5, double* %Ci
+  %i10 = add i64 %i, 10
+  %Ai10 = getelementptr double* %A, i64 %i10
+  %t9 = load double* %Ai10
+  %Bi10 = getelementptr double* %B, i64 %i10
+  %t11 = load double* %Bi10
+  %t12 = fsub double %t9, %t11
+  %Ci10 = getelementptr double* %C, i64 %i10
+  store double %t12, double* %Ci10
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; This is a variation on full_me_0 in which the 0,+,1 induction variable
+; has a non-address use, pinning that value in a register.
+
+; CHECK: count_me_3:
+; CHECK: call
+; CHECK: movsd   (%r15,%r13,8), %xmm0
+; CHECK: mulsd   (%r14,%r13,8), %xmm0
+; CHECK: movsd   %xmm0, (%r12,%r13,8)
+; CHECK: incq    %r13
+; CHECK: cmpq    %r13, %rbx
+; CHECK: jne
+
+declare void @use(i64)
+
+define void @count_me_3(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  call void @use(i64 %i)
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; LSR should use only one indvar for the inner loop.
+; rdar://7657764
+
+; CHECK: asd:
+; CHECK: BB10_5:
+; CHECK-NEXT: addl  (%r{{[^,]*}},%rdi,4), %e
+; CHECK-NEXT: incq  %rdi
+; CHECK-NEXT: cmpq  %rdi, %r{{[^,]*}}
+; CHECK-NEXT: jg
+
+%struct.anon = type { i32, [4200 x i32] }
+
+@bars = common global [123123 x %struct.anon] zeroinitializer, align 32 ; <[123123 x %struct.anon]*> [#uses=2]
+
+define i32 @asd(i32 %n) nounwind readonly {
+entry:
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph14, label %bb5
+
+bb.nph14:                                         ; preds = %entry
+  %tmp18 = zext i32 %n to i64                     ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb3, %bb.nph14
+  %indvar16 = phi i64 [ 0, %bb.nph14 ], [ %indvar.next17, %bb3 ] ; <i64> [#uses=3]
+  %s.113 = phi i32 [ 0, %bb.nph14 ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=2]
+  %scevgep2526 = getelementptr [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 0 ; <i32*> [#uses=1]
+  %1 = load i32* %scevgep2526, align 4            ; <i32> [#uses=2]
+  %2 = icmp sgt i32 %1, 0                         ; <i1> [#uses=1]
+  br i1 %2, label %bb.nph, label %bb3
+
+bb.nph:                                           ; preds = %bb
+  %tmp23 = sext i32 %1 to i64                     ; <i64> [#uses=1]
+  br label %bb1
+
+bb1:                                              ; preds = %bb.nph, %bb1
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp19, %bb1 ] ; <i64> [#uses=2]
+  %s.07 = phi i32 [ %s.113, %bb.nph ], [ %4, %bb1 ] ; <i32> [#uses=1]
+  %c.08 = getelementptr [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 1, i64 %indvar ; <i32*> [#uses=1]
+  %3 = load i32* %c.08, align 4                   ; <i32> [#uses=1]
+  %4 = add nsw i32 %3, %s.07                      ; <i32> [#uses=2]
+  %tmp19 = add i64 %indvar, 1                     ; <i64> [#uses=2]
+  %5 = icmp sgt i64 %tmp23, %tmp19                ; <i1> [#uses=1]
+  br i1 %5, label %bb1, label %bb3
+
+bb3:                                              ; preds = %bb1, %bb
+  %s.0.lcssa = phi i32 [ %s.113, %bb ], [ %4, %bb1 ] ; <i32> [#uses=2]
+  %indvar.next17 = add i64 %indvar16, 1           ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next17, %tmp18  ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb5, label %bb
+
+bb5:                                              ; preds = %bb3, %entry
+  %s.1.lcssa = phi i32 [ 0, %entry ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=1]
+  ret i32 %s.1.lcssa
+}
diff --git a/test/CodeGen/X86/lsr-wrap.ll b/test/CodeGen/X86/lsr-wrap.ll
new file mode 100644
index 0000000000..ec8db501ef
--- /dev/null
+++ b/test/CodeGen/X86/lsr-wrap.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; LSR would like to use a single IV for both of these, however it's
+; not safe due to wraparound.
+
+; CHECK: addb  $-4, %r
+; CHECK: decw  %
+
+@g_19 = common global i32 0                       ; <i32*> [#uses=2]
+
+declare i32 @func_8(i8 zeroext) nounwind
+
+declare i32 @func_3(i8 signext) nounwind
+
+define void @func_1() nounwind {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %indvar = phi i16 [ 0, %entry ], [ %indvar.next, %bb ] ; <i16> [#uses=2]
+  %tmp = sub i16 0, %indvar                       ; <i16> [#uses=1]
+  %tmp27 = trunc i16 %tmp to i8                   ; <i8> [#uses=1]
+  %tmp1 = load i32* @g_19, align 4                ; <i32> [#uses=2]
+  %tmp2 = add i32 %tmp1, 1                        ; <i32> [#uses=1]
+  store i32 %tmp2, i32* @g_19, align 4
+  %tmp3 = trunc i32 %tmp1 to i8                   ; <i8> [#uses=1]
+  %tmp4 = tail call i32 @func_8(i8 zeroext %tmp3) nounwind ; <i32> [#uses=0]
+  %tmp5 = shl i8 %tmp27, 2                        ; <i8> [#uses=1]
+  %tmp6 = add i8 %tmp5, -112                      ; <i8> [#uses=1]
+  %tmp7 = tail call i32 @func_3(i8 signext %tmp6) nounwind ; <i32> [#uses=0]
+  %indvar.next = add i16 %indvar, 1               ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %indvar.next, -28       ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb
+  ret void
+}
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index bc493bd8f7..0b4d73a683 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -169,7 +169,7 @@ loop:
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = fmul double %t4, 2.3
+	%t5 = fdiv double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
@@ -199,7 +199,7 @@ loop:
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = fmul double %t4, 2.3
+	%t5 = fdiv double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
@@ -229,7 +229,7 @@ loop:
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double* %d, i64 %indvar.i24
 	%t4 = load double* %t3
-	%t5 = fmul double %t4, 2.3
+	%t5 = fdiv double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double* %d, i64 %indvar
 	%t7 = load double* %t6
diff --git a/test/CodeGen/X86/omit-label.ll b/test/CodeGen/X86/omit-label.ll
deleted file mode 100644
index 0ec03ebace..0000000000
--- a/test/CodeGen/X86/omit-label.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-linux-gnu | FileCheck %s
-; PR4126
-; PR4732
-
-; Don't omit these labels' definitions.
-
-; CHECK: bux:
-; CHECK: LBB1_1:
-
-define void @bux(i32 %p_53) nounwind optsize {
-entry:
-	%0 = icmp eq i32 %p_53, 0		; <i1> [#uses=1]
-	%1 = icmp sgt i32 %p_53, 0		; <i1> [#uses=1]
-	%or.cond = and i1 %0, %1		; <i1> [#uses=1]
-	br i1 %or.cond, label %bb.i, label %bb3
-
-bb.i:		; preds = %entry
-	%2 = add i32 %p_53, 1		; <i32> [#uses=1]
-	%3 = icmp slt i32 %2, 0		; <i1> [#uses=0]
-	br label %bb3
-
-bb3:		; preds = %bb.i, %entry
-	%4 = tail call i32 (...)* @baz(i32 0) nounwind		; <i32> [#uses=0]
-	ret void
-}
-
-declare i32 @baz(...)
-
-; Don't omit this label in the assembly output.
-; CHECK: int321:
-; CHECK: LBB2_1
-; CHECK: LBB2_1
-; CHECK: LBB2_1:
-
-define void @int321(i8 signext %p_103, i32 %uint8p_104) nounwind readnone {
-entry:
-  %tobool = icmp eq i8 %p_103, 0                  ; <i1> [#uses=1]
-  %cmp.i = icmp sgt i8 %p_103, 0                  ; <i1> [#uses=1]
-  %or.cond = and i1 %tobool, %cmp.i               ; <i1> [#uses=1]
-  br i1 %or.cond, label %land.end.i, label %for.cond.preheader
-
-land.end.i:                                       ; preds = %entry
-  %conv3.i = sext i8 %p_103 to i32                ; <i32> [#uses=1]
-  %div.i = sdiv i32 1, %conv3.i                   ; <i32> [#uses=1]
-  %tobool.i = icmp eq i32 %div.i, -2147483647     ; <i1> [#uses=0]
-  br label %for.cond.preheader
-
-for.cond.preheader:                               ; preds = %land.end.i, %entry
-  %cmp = icmp sgt i8 %p_103, 1                    ; <i1> [#uses=1]
-  br i1 %cmp, label %for.end.split, label %for.cond
-
-for.cond:                                         ; preds = %for.cond.preheader, %for.cond
-  br label %for.cond
-
-for.end.split:                                    ; preds = %for.cond.preheader
-  ret void
-}
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index 12736cda4c..6a08dae51f 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mcpu=i486 | grep fstpl | count 4
-; RUN: llc < %s -mcpu=i486 | grep fstps | count 3
+; RUN: llc < %s -mcpu=i486 | grep fstpl | count 5
+; RUN: llc < %s -mcpu=i486 | grep fstps | count 2
 ; PR1505
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/pr3495-2.ll b/test/CodeGen/X86/pr3495-2.ll
index 71aa5a0488..98c064a07d 100644
--- a/test/CodeGen/X86/pr3495-2.ll
+++ b/test/CodeGen/X86/pr3495-2.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited}
+; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of loads added} | grep 1
+; PR3495
+;
+; This test may not be testing what it was supposed to test.
+; It used to have two spills and four reloads, but not it only has one spill and one reload.
 
 target datalayout = "e-p:32:32:32"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
index 1795970d35..e84a84f59b 100644
--- a/test/CodeGen/X86/pr3495.ll
+++ b/test/CodeGen/X86/pr3495.ll
@@ -1,8 +1,7 @@
 ; RUN: llc < %s -march=x86 -stats |& grep {Number of loads added} | grep 2
 ; RUN: llc < %s -march=x86 -stats |& grep {Number of register spills} | grep 1
-; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 38
+; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 34
 ; PR3495
-; The loop reversal kicks in once here, resulting in one fewer instruction.
 
 target triple = "i386-pc-linux-gnu"
 @x = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=1]
diff --git a/test/CodeGen/X86/pre-split8.ll b/test/CodeGen/X86/pre-split8.ll
index ea4b9496b3..0684bd036c 100644
--- a/test/CodeGen/X86/pre-split8.ll
+++ b/test/CodeGen/X86/pre-split8.ll
@@ -20,7 +20,7 @@ bb:		; preds = %bb9.i, %entry
 
 bb9.i:		; preds = %bb
 	%2 = fsub double %.rle4, %0		; <double> [#uses=0]
-	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
+	%3 = tail call double @asin(double %.rle4) nounwind readonly		; <double> [#uses=0]
 	%4 = fmul double 0.000000e+00, %0		; <double> [#uses=1]
 	%5 = tail call double @tan(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
 	%6 = fmul double %4, 0.000000e+00		; <double> [#uses=1]
diff --git a/test/CodeGen/X86/pre-split9.ll b/test/CodeGen/X86/pre-split9.ll
index c27d925d43..86dda33533 100644
--- a/test/CodeGen/X86/pre-split9.ll
+++ b/test/CodeGen/X86/pre-split9.ll
@@ -22,7 +22,7 @@ bb:		; preds = %bb9.i, %entry
 
 bb9.i:		; preds = %bb
 	%2 = fsub double %.rle4, %0		; <double> [#uses=0]
-	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
+	%3 = tail call double @asin(double %.rle4) nounwind readonly		; <double> [#uses=0]
 	%4 = tail call double @sin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
 	%5 = fmul double %4, %0		; <double> [#uses=1]
 	%6 = tail call double @tan(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
diff --git a/test/CodeGen/X86/ptrtoint-constexpr.ll b/test/CodeGen/X86/ptrtoint-constexpr.ll
index dd9790568a..d1cb34bec8 100644
--- a/test/CodeGen/X86/ptrtoint-constexpr.ll
+++ b/test/CodeGen/X86/ptrtoint-constexpr.ll
@@ -9,6 +9,6 @@
 
 ; CHECK:	.globl x
 ; CHECK: x:
-; CHECK: .quad	3
+; CHECK: .quad	((0+1)&4294967295)*3
 
 @x = global i64 mul (i64 3, i64 ptrtoint (i2* getelementptr (i2* null, i64 1) to i64))
diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll
index fc67e4417c..77f320f105 100644
--- a/test/CodeGen/X86/scalar_widen_div.ll
+++ b/test/CodeGen/X86/scalar_widen_div.ll
@@ -152,3 +152,32 @@ define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
   %rem.r = urem <5 x i64> %num, %rem
   ret <5 x i64>  %rem.r
 }
+
+define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
+; CHECK: ret
+entry:
+  %cmp13 = icmp sgt i32 %n, 0
+  br i1 %cmp13, label %bb.nph, label %for.end
+
+bb.nph:  
+  br label %for.body
+
+for.body:
+  %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] 
+  %arrayidx11 = getelementptr <3 x i32>* %dest, i32 %i.014
+  %tmp4 = load <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
+  %arrayidx7 = getelementptr inbounds <3 x i32>* %old, i32 %i.014
+  %tmp8 = load <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
+  %div = sdiv <3 x i32> %tmp4, %tmp8
+  store <3 x i32> %div, <3 x i32>* %arrayidx11
+  %inc = add nsw i32 %i.014, 1
+  %exitcond = icmp eq i32 %inc, %n 
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index 17ffb5e464..19fbed015b 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1,17 +1,26 @@
 ; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s
 
 ; Some of these patterns can be matched as SSE min or max. Some of
 ; then can be matched provided that the operands are swapped.
 ; Some of them can't be matched at all and require a comparison
 ; and a conditional branch.
 
-; The naming convention is {,x_}{o,u}{gt,lt,ge,le}{,_inverse}
+; The naming convention is {,x_,y_}{o,u}{gt,lt,ge,le}{,_inverse}
 ; x_ : use 0.0 instead of %y
+; y_ : use -0.0 instead of %y
 ; _inverse : swap the arms of the select.
 
 ; CHECK:      ogt:
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      ogt:
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ogt:
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @ogt(double %x, double %y) nounwind {
   %c = fcmp ogt double %x, %y
   %d = select i1 %c, double %x, double %y
@@ -21,6 +30,12 @@ define double @ogt(double %x, double %y) nounwind {
 ; CHECK:      olt:
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      olt:
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      olt:
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @olt(double %x, double %y) nounwind {
   %c = fcmp olt double %x, %y
   %d = select i1 %c, double %x, double %y
@@ -31,6 +46,14 @@ define double @olt(double %x, double %y) nounwind {
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movapd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      ogt_inverse:
+; UNSAFE-NEXT: minsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ogt_inverse:
+; FINITE-NEXT: minsd  %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @ogt_inverse(double %x, double %y) nounwind {
   %c = fcmp ogt double %x, %y
   %d = select i1 %c, double %y, double %x
@@ -41,6 +64,14 @@ define double @ogt_inverse(double %x, double %y) nounwind {
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movapd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      olt_inverse:
+; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      olt_inverse:
+; FINITE-NEXT: maxsd  %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @olt_inverse(double %x, double %y) nounwind {
   %c = fcmp olt double %x, %y
   %d = select i1 %c, double %y, double %x
@@ -49,6 +80,12 @@ define double @olt_inverse(double %x, double %y) nounwind {
 
 ; CHECK:      oge:
 ; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; UNSAFE:      oge:
+; UNSAFE-NEXT: maxsd	%xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      oge:
+; FINITE-NEXT: maxsd	%xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @oge(double %x, double %y) nounwind {
   %c = fcmp oge double %x, %y
   %d = select i1 %c, double %x, double %y
@@ -57,6 +94,10 @@ define double @oge(double %x, double %y) nounwind {
 
 ; CHECK:      ole:
 ; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; UNSAFE:      ole:
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; FINITE:      ole:
+; FINITE-NEXT: minsd %xmm1, %xmm0
 define double @ole(double %x, double %y) nounwind {
   %c = fcmp ole double %x, %y
   %d = select i1 %c, double %x, double %y
@@ -65,6 +106,14 @@ define double @ole(double %x, double %y) nounwind {
 
 ; CHECK:      oge_inverse:
 ; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; UNSAFE:      oge_inverse:
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      oge_inverse:
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @oge_inverse(double %x, double %y) nounwind {
   %c = fcmp oge double %x, %y
   %d = select i1 %c, double %y, double %x
@@ -73,6 +122,14 @@ define double @oge_inverse(double %x, double %y) nounwind {
 
 ; CHECK:      ole_inverse:
 ; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; UNSAFE:      ole_inverse:
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ole_inverse:
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @ole_inverse(double %x, double %y) nounwind {
   %c = fcmp ole double %x, %y
   %d = select i1 %c, double %y, double %x
@@ -83,6 +140,14 @@ define double @ole_inverse(double %x, double %y) nounwind {
 ; CHECK-NEXT: pxor  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      x_ogt:
+; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ogt:
+; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_ogt(double %x) nounwind {
   %c = fcmp ogt double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
@@ -93,6 +158,14 @@ define double @x_ogt(double %x) nounwind {
 ; CHECK-NEXT: pxor  %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      x_olt:
+; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_olt:
+; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_olt(double %x) nounwind {
   %c = fcmp olt double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
@@ -104,6 +177,16 @@ define double @x_olt(double %x) nounwind {
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movapd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      x_ogt_inverse:
+; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: minsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ogt_inverse:
+; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: minsd  %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_ogt_inverse(double %x) nounwind {
   %c = fcmp ogt double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
@@ -115,6 +198,16 @@ define double @x_ogt_inverse(double %x) nounwind {
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movapd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      x_olt_inverse:
+; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_olt_inverse:
+; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: maxsd  %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_olt_inverse(double %x) nounwind {
   %c = fcmp olt double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
@@ -122,9 +215,15 @@ define double @x_olt_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_oge:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
-; CHECK-NEXT: maxsd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      x_oge:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_oge:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_oge(double %x) nounwind {
   %c = fcmp oge double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
@@ -132,9 +231,15 @@ define double @x_oge(double %x) nounwind {
 }
 
 ; CHECK:      x_ole:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
-; CHECK-NEXT: minsd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      x_ole:
+; UNSAFE-NEXT: pxor %xmm1, %xmm1
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ole:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_ole(double %x) nounwind {
   %c = fcmp ole double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
@@ -142,10 +247,17 @@ define double @x_ole(double %x) nounwind {
 }
 
 ; CHECK:      x_oge_inverse:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
-; CHECK-NEXT: minsd  %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      x_oge_inverse:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movapd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_oge_inverse:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movapd  %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_oge_inverse(double %x) nounwind {
   %c = fcmp oge double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
@@ -153,10 +265,17 @@ define double @x_oge_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_ole_inverse:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
-; CHECK-NEXT: maxsd  %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      x_ole_inverse:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movapd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ole_inverse:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movapd  %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_ole_inverse(double %x) nounwind {
   %c = fcmp ole double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
@@ -164,7 +283,13 @@ define double @x_ole_inverse(double %x) nounwind {
 }
 
 ; CHECK:      ugt:
-; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      ugt:
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ugt:
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @ugt(double %x, double %y) nounwind {
   %c = fcmp ugt double %x, %y
   %d = select i1 %c, double %x, double %y
@@ -172,7 +297,13 @@ define double @ugt(double %x, double %y) nounwind {
 }
 
 ; CHECK:      ult:
-; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      ult:
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ult:
+; FINITE-NEXT: minsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @ult(double %x, double %y) nounwind {
   %c = fcmp ult double %x, %y
   %d = select i1 %c, double %x, double %y
@@ -180,7 +311,15 @@ define double @ult(double %x, double %y) nounwind {
 }
 
 ; CHECK:      ugt_inverse:
-; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      ugt_inverse:
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movapd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ugt_inverse:
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movapd  %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @ugt_inverse(double %x, double %y) nounwind {
   %c = fcmp ugt double %x, %y
   %d = select i1 %c, double %y, double %x
@@ -188,7 +327,15 @@ define double @ugt_inverse(double %x, double %y) nounwind {
 }
 
 ; CHECK:      ult_inverse:
-; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      ult_inverse:
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movapd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ult_inverse:
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movapd  %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @ult_inverse(double %x, double %y) nounwind {
   %c = fcmp ult double %x, %y
   %d = select i1 %c, double %y, double %x
@@ -196,9 +343,15 @@ define double @ult_inverse(double %x, double %y) nounwind {
 }
 
 ; CHECK:      uge:
-; CHECK-NEXT: maxsd  %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: maxsd   %xmm0, %xmm1
+; CHECK-NEXT: movapd  %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      uge:
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      uge:
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @uge(double %x, double %y) nounwind {
   %c = fcmp uge double %x, %y
   %d = select i1 %c, double %x, double %y
@@ -209,6 +362,12 @@ define double @uge(double %x, double %y) nounwind {
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movapd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      ule:
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ule:
+; FINITE-NEXT: minsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @ule(double %x, double %y) nounwind {
   %c = fcmp ule double %x, %y
   %d = select i1 %c, double %x, double %y
@@ -218,6 +377,14 @@ define double @ule(double %x, double %y) nounwind {
 ; CHECK:      uge_inverse:
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      uge_inverse:
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      uge_inverse:
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @uge_inverse(double %x, double %y) nounwind {
   %c = fcmp uge double %x, %y
   %d = select i1 %c, double %y, double %x
@@ -227,6 +394,14 @@ define double @uge_inverse(double %x, double %y) nounwind {
 ; CHECK:      ule_inverse:
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      ule_inverse:
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ule_inverse:
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @ule_inverse(double %x, double %y) nounwind {
   %c = fcmp ule double %x, %y
   %d = select i1 %c, double %y, double %x
@@ -234,10 +409,15 @@ define double @ule_inverse(double %x, double %y) nounwind {
 }
 
 ; CHECK:      x_ugt:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
-; CHECK-NEXT: maxsd  %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      x_ugt:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ugt:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_ugt(double %x) nounwind {
   %c = fcmp ugt double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
@@ -245,10 +425,15 @@ define double @x_ugt(double %x) nounwind {
 }
 
 ; CHECK:      x_ult:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
-; CHECK-NEXT: minsd  %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      x_ult:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ult:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: minsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_ult(double %x) nounwind {
   %c = fcmp ult double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
@@ -256,9 +441,17 @@ define double @x_ult(double %x) nounwind {
 }
 
 ; CHECK:      x_ugt_inverse:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
-; CHECK-NEXT: minsd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      x_ugt_inverse:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movapd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ugt_inverse:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movapd  %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_ugt_inverse(double %x) nounwind {
   %c = fcmp ugt double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
@@ -266,9 +459,17 @@ define double @x_ugt_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_ult_inverse:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
-; CHECK-NEXT: maxsd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      x_ult_inverse:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movapd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ult_inverse:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movapd  %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_ult_inverse(double %x) nounwind {
   %c = fcmp ult double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
@@ -280,6 +481,14 @@ define double @x_ult_inverse(double %x) nounwind {
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movapd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      x_uge:
+; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_uge:
+; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: maxsd  %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_uge(double %x) nounwind {
   %c = fcmp uge double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
@@ -291,6 +500,14 @@ define double @x_uge(double %x) nounwind {
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movapd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      x_ule:
+; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: minsd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ule:
+; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: minsd  %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_ule(double %x) nounwind {
   %c = fcmp ule double %x, 0.000000e+00
   %d = select i1 %c, double %x, double 0.000000e+00
@@ -301,6 +518,16 @@ define double @x_ule(double %x) nounwind {
 ; CHECK-NEXT: pxor  %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      x_uge_inverse:
+; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_uge_inverse:
+; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_uge_inverse(double %x) nounwind {
   %c = fcmp uge double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
@@ -311,16 +538,301 @@ define double @x_uge_inverse(double %x) nounwind {
 ; CHECK-NEXT: pxor  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
+; UNSAFE:      x_ule_inverse:
+; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ule_inverse:
+; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @x_ule_inverse(double %x) nounwind {
   %c = fcmp ule double %x, 0.000000e+00
   %d = select i1 %c, double 0.000000e+00, double %x
   ret double %d
 }
 
+; CHECK:      y_ogt:
+; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_ogt:
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ogt:
+; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ogt(double %x) nounwind {
+  %c = fcmp ogt double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_olt:
+; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_olt:
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_olt:
+; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_olt(double %x) nounwind {
+  %c = fcmp olt double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ogt_inverse:
+; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_ogt_inverse:
+; UNSAFE-NEXT: movsd  {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ogt_inverse:
+; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd  %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ogt_inverse(double %x) nounwind {
+  %c = fcmp ogt double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_olt_inverse:
+; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_olt_inverse:
+; UNSAFE-NEXT: movsd  {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_olt_inverse:
+; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd  %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_olt_inverse(double %x) nounwind {
+  %c = fcmp olt double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_oge:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      y_oge:
+; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_oge:
+; FINITE-NEXT: maxsd   {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_oge(double %x) nounwind {
+  %c = fcmp oge double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ole:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      y_ole:
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ole:
+; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ole(double %x) nounwind {
+  %c = fcmp ole double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_oge_inverse:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      y_oge_inverse:
+; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movapd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_oge_inverse:
+; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movapd  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_oge_inverse(double %x) nounwind {
+  %c = fcmp oge double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ole_inverse:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      y_ole_inverse:
+; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movapd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ole_inverse:
+; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movapd  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ole_inverse(double %x) nounwind {
+  %c = fcmp ole double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ugt:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      y_ugt:
+; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ugt:
+; FINITE-NEXT: maxsd   {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ugt(double %x) nounwind {
+  %c = fcmp ugt double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ult:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      y_ult:
+; UNSAFE-NEXT: minsd   {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ult:
+; FINITE-NEXT: minsd   {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ult(double %x) nounwind {
+  %c = fcmp ult double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ugt_inverse:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      y_ugt_inverse:
+; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movapd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ugt_inverse:
+; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movapd  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ugt_inverse(double %x) nounwind {
+  %c = fcmp ugt double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ult_inverse:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      y_ult_inverse:
+; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movapd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ult_inverse:
+; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movapd  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ult_inverse(double %x) nounwind {
+  %c = fcmp ult double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_uge:
+; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_uge:
+; UNSAFE-NEXT: maxsd  {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_uge:
+; FINITE-NEXT: maxsd  {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_uge(double %x) nounwind {
+  %c = fcmp uge double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ule:
+; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_ule:
+; UNSAFE-NEXT: minsd  {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ule:
+; FINITE-NEXT: minsd  {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ule(double %x) nounwind {
+  %c = fcmp ule double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_uge_inverse:
+; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_uge_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_uge_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_uge_inverse(double %x) nounwind {
+  %c = fcmp uge double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ule_inverse:
+; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_ule_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ule_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ule_inverse(double %x) nounwind {
+  %c = fcmp ule double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
 ; Test a few more misc. cases.
 
 ; CHECK: clampTo3k_a:
 ; CHECK: minsd
+; UNSAFE: clampTo3k_a:
+; UNSAFE: minsd
+; FINITE: clampTo3k_a:
+; FINITE: minsd
 define double @clampTo3k_a(double %x) nounwind readnone {
 entry:
   %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -330,6 +842,10 @@ entry:
 
 ; CHECK: clampTo3k_b:
 ; CHECK: minsd
+; UNSAFE: clampTo3k_b:
+; UNSAFE: minsd
+; FINITE: clampTo3k_b:
+; FINITE: minsd
 define double @clampTo3k_b(double %x) nounwind readnone {
 entry:
   %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -339,6 +855,10 @@ entry:
 
 ; CHECK: clampTo3k_c:
 ; CHECK: maxsd
+; UNSAFE: clampTo3k_c:
+; UNSAFE: maxsd
+; FINITE: clampTo3k_c:
+; FINITE: maxsd
 define double @clampTo3k_c(double %x) nounwind readnone {
 entry:
   %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -348,6 +868,10 @@ entry:
 
 ; CHECK: clampTo3k_d:
 ; CHECK: maxsd
+; UNSAFE: clampTo3k_d:
+; UNSAFE: maxsd
+; FINITE: clampTo3k_d:
+; FINITE: maxsd
 define double @clampTo3k_d(double %x) nounwind readnone {
 entry:
   %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -357,6 +881,10 @@ entry:
 
 ; CHECK: clampTo3k_e:
 ; CHECK: maxsd
+; UNSAFE: clampTo3k_e:
+; UNSAFE: maxsd
+; FINITE: clampTo3k_e:
+; FINITE: maxsd
 define double @clampTo3k_e(double %x) nounwind readnone {
 entry:
   %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -366,6 +894,10 @@ entry:
 
 ; CHECK: clampTo3k_f:
 ; CHECK: maxsd
+; UNSAFE: clampTo3k_f:
+; UNSAFE: maxsd
+; FINITE: clampTo3k_f:
+; FINITE: maxsd
 define double @clampTo3k_f(double %x) nounwind readnone {
 entry:
   %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -375,6 +907,10 @@ entry:
 
 ; CHECK: clampTo3k_g:
 ; CHECK: minsd
+; UNSAFE: clampTo3k_g:
+; UNSAFE: minsd
+; FINITE: clampTo3k_g:
+; FINITE: minsd
 define double @clampTo3k_g(double %x) nounwind readnone {
 entry:
   %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -384,6 +920,10 @@ entry:
 
 ; CHECK: clampTo3k_h:
 ; CHECK: minsd
+; UNSAFE: clampTo3k_h:
+; UNSAFE: minsd
+; FINITE: clampTo3k_h:
+; FINITE: minsd
 define double @clampTo3k_h(double %x) nounwind readnone {
 entry:
   %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index b2af7c947d..921161e4a1 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -144,10 +144,9 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
 	store <4 x float> %tmp13, <4 x float>* %r
 	ret void
 ; X64: 	t9:
-; X64: 		movsd	(%rsi), %xmm0
-; X64:	        movaps  (%rdi), %xmm1
-; X64:	        movlhps %xmm0, %xmm1
-; X64:	        movaps  %xmm1, (%rdi)
+; X64: 		movaps	(%rdi), %xmm0
+; X64:	        movhps	(%rsi), %xmm0
+; X64:	        movaps	%xmm0, (%rdi)
 ; X64: 		ret
 }
 
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index cb65e9b50f..e971ef70db 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=static -mcpu=yonah | grep {andpd.*4(%esp), %xmm}
+; RUN: llc < %s -relocation-model=static -realign-stack=1 -mcpu=yonah | FileCheck %s
 
 ; The double argument is at 4(esp) which is 16-byte aligned, allowing us to
 ; fold the load into the andpd.
@@ -12,6 +12,7 @@ entry:
 	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp1 = load double* %tmp, align 8		; <double> [#uses=1]
 	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
+    ; CHECK: andpd{{.*}}4(%esp), %xmm
 	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
 	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
@@ -19,4 +20,20 @@ entry:
 	ret void
 }
 
+define void @test2() alignstack(16) {
+entry:
+    ; CHECK: andl{{.*}}$-16, %esp
+    ret void
+}
+
+; Use a call to force a spill.
+define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) {
+entry:
+    ; CHECK: andl{{.*}}$-32, %esp
+    call void @test2()
+    %A = mul <2 x double> %x, %y
+    ret <2 x double> %A
+}
+
 declare double @fabs(double)
+
diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
index 7d85818d46..42e7a394d8 100644
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ b/test/CodeGen/X86/stack-color-with-reg.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 14
+; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 8
 
 	type { [62 x %struct.Bitvec*] }		; type %0
 	type { i8* }		; type %1
diff --git a/test/CodeGen/X86/stdcall.ll b/test/CodeGen/X86/stdcall.ll
new file mode 100644
index 0000000000..70204bcf47
--- /dev/null
+++ b/test/CodeGen/X86/stdcall.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+; PR5851
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-mingw32"
+
+%0 = type { void (...)* }
+
+@B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4
+; CHECK: _B:
+; CHECK: .long _MyFunc@0
+
+define internal x86_stdcallcc void @MyFunc() nounwind {
+entry:
+  ret void
+}
diff --git a/test/CodeGen/X86/store_op_load_fold.ll b/test/CodeGen/X86/store_op_load_fold.ll
index 66d0e47c6d..6e47eb397d 100644
--- a/test/CodeGen/X86/store_op_load_fold.ll
+++ b/test/CodeGen/X86/store_op_load_fold.ll
@@ -4,7 +4,7 @@
 
 @X = internal global i16 0              ; <i16*> [#uses=2]
 
-define void @foo() {
+define void @foo() nounwind {
         %tmp.0 = load i16* @X           ; <i16> [#uses=1]
         %tmp.3 = add i16 %tmp.0, 329            ; <i16> [#uses=1]
         store i16 %tmp.3, i16* @X
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 0ccfe470db..46e59e95e5 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,21 +1,12 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep {and	DWORD PTR} | count 2
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
 
 target datalayout = "e-p:32:32"
         %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 
-define internal fastcc i32 @dct_chroma(i32 %uv, i32 %cr_cbp) {
-entry:
-        br i1 true, label %cond_true2732.preheader, label %cond_true129
-cond_true129:           ; preds = %entry
-        ret i32 0
+define internal fastcc i32 @dct_chroma(i32 %uv, i32 %cr_cbp) nounwind {
 cond_true2732.preheader:                ; preds = %entry
         %tmp2666 = getelementptr %struct.Macroblock* null, i32 0, i32 13                ; <i64*> [#uses=2]
         %tmp2674 = trunc i32 0 to i8            ; <i8> [#uses=1]
-        br i1 true, label %cond_true2732.preheader.split.us, label %cond_true2732.preheader.split
-cond_true2732.preheader.split.us:               ; preds = %cond_true2732.preheader
-        br i1 true, label %cond_true2732.outer.us.us, label %cond_true2732.outer.us
-cond_true2732.outer.us.us:              ; preds = %cond_true2732.preheader.split.us
         %tmp2667.us.us = load i64* %tmp2666             ; <i64> [#uses=1]
         %tmp2670.us.us = load i64* null         ; <i64> [#uses=1]
         %shift.upgrd.1 = zext i8 %tmp2674 to i64                ; <i64> [#uses=1]
@@ -24,11 +15,10 @@ cond_true2732.outer.us.us:              ; preds = %cond_true2732.preheader.split
         %tmp2676.us.us = and i64 %tmp2667.us.us, %tmp2675not.us.us              ; <i64> [#uses=1]
         store i64 %tmp2676.us.us, i64* %tmp2666
         ret i32 0
-cond_true2732.outer.us:         ; preds = %cond_true2732.preheader.split.us
-        ret i32 0
-cond_true2732.preheader.split:          ; preds = %cond_true2732.preheader
-        ret i32 0
-cond_next2752:          ; No predecessors!
-        ret i32 0
+
+; CHECK: 	and	{{E..}}, DWORD PTR [360]
+; CHECK:	and	DWORD PTR [356], {{E..}}
+; CHECK:	mov	DWORD PTR [360], {{E..}}
+
 }
 
diff --git a/test/CodeGen/X86/tailcall2.ll b/test/CodeGen/X86/tailcall2.ll
index 80bab619c1..90315fd2f2 100644
--- a/test/CodeGen/X86/tailcall2.ll
+++ b/test/CodeGen/X86/tailcall2.ll
@@ -195,3 +195,24 @@ bb2:
 }
 
 declare i32 @foo6(i32, i32, %struct.t* byval align 4)
+
+; rdar://r7717598
+%struct.ns = type { i32, i32 }
+%struct.cp = type { float, float }
+
+define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
+; 32: t13:
+; 32-NOT: jmp
+; 32: call
+; 32: ret
+
+; 64: t13:
+; 64-NOT: jmp
+; 64: call
+; 64: ret
+entry:
+  %0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval align 4 %yy, i8 signext 0) nounwind
+  ret %struct.ns* %0
+}
+
+declare fastcc %struct.ns* @foo7(%struct.cp* byval align 4, i8 signext) nounwind ssp
diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
index bfab1aef90..6062084106 100644
--- a/test/CodeGen/X86/trunc-to-bool.ll
+++ b/test/CodeGen/X86/trunc-to-bool.ll
@@ -3,13 +3,14 @@
 ; value and as the operand of a branch.
 ; RUN: llc < %s -march=x86 | FileCheck %s
 
-define i1 @test1(i32 %X) zeroext {
+define i1 @test1(i32 %X) zeroext nounwind {
     %Y = trunc i32 %X to i1
     ret i1 %Y
 }
+; CHECK: test1:
 ; CHECK: andl $1, %eax
 
-define i1 @test2(i32 %val, i32 %mask) {
+define i1 @test2(i32 %val, i32 %mask) nounwind {
 entry:
     %shifted = ashr i32 %val, %mask
     %anded = and i32 %shifted, 1
@@ -20,9 +21,10 @@ ret_true:
 ret_false:
     ret i1 false
 }
-; CHECK: testb $1, %al
+; CHECK: test2:
+; CHECK: btl %eax
 
-define i32 @test3(i8* %ptr) {
+define i32 @test3(i8* %ptr) nounwind {
     %val = load i8* %ptr
     %tmp = trunc i8 %val to i1
     br i1 %tmp, label %cond_true, label %cond_false
@@ -31,9 +33,10 @@ cond_true:
 cond_false:
     ret i32 42
 }
-; CHECK: testb $1, %al
+; CHECK: test3:
+; CHECK: testb $1, (%eax)
 
-define i32 @test4(i8* %ptr) {
+define i32 @test4(i8* %ptr) nounwind {
     %tmp = ptrtoint i8* %ptr to i1
     br i1 %tmp, label %cond_true, label %cond_false
 cond_true:
@@ -41,9 +44,10 @@ cond_true:
 cond_false:
     ret i32 42
 }
-; CHECK: testb $1, %al
+; CHECK: test4:
+; CHECK: testb $1, 4(%esp)
 
-define i32 @test6(double %d) {
+define i32 @test5(double %d) nounwind {
     %tmp = fptosi double %d to i1
     br i1 %tmp, label %cond_true, label %cond_false
 cond_true:
@@ -51,4 +55,5 @@ cond_true:
 cond_false:
     ret i32 42
 }
+; CHECK: test5:
 ; CHECK: testb $1
diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll
index d0e13f61f2..4c37225ce0 100644
--- a/test/CodeGen/X86/twoaddr-coalesce.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep mov | count 5
+; RUN: llc < %s -march=x86 | grep mov | count 4
 ; rdar://6523745
 
 @"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll
index 2dd2a4adac..c2f0c23fe1 100644
--- a/test/CodeGen/X86/use-add-flags.ll
+++ b/test/CodeGen/X86/use-add-flags.ll
@@ -5,13 +5,13 @@
 
 ; Use the flags on the add.
 
-; CHECK: add_zf:
+; CHECK: test1:
 ;      CHECK: addl    (%rdi), %esi
 ; CHECK-NEXT: movl    %edx, %eax
 ; CHECK-NEXT: cmovnsl %ecx, %eax
 ; CHECK-NEXT: ret
 
-define i32 @add_zf(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
+define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
 	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
 	%tmp4 = add i32 %tmp2, %y		; <i32> [#uses=1]
 	%tmp5 = icmp slt i32 %tmp4, 0		; <i1> [#uses=1]
@@ -24,10 +24,10 @@ declare void @foo(i32)
 ; Don't use the flags result of the and here, since the and has no
 ; other use. A simple test is better.
 
-; CHECK: bar:
+; CHECK: test2:
 ; CHECK: testb   $16, %dil
 
-define void @bar(i32 %x) nounwind {
+define void @test2(i32 %x) nounwind {
   %y = and i32 %x, 16
   %t = icmp eq i32 %y, 0
   br i1 %t, label %true, label %false
@@ -40,11 +40,11 @@ false:
 
 ; Do use the flags result of the and here, since the and has another use.
 
-; CHECK: qux:
+; CHECK: test3:
 ;      CHECK: andl    $16, %edi
 ; CHECK-NEXT: jne
 
-define void @qux(i32 %x) nounwind {
+define void @test3(i32 %x) nounwind {
   %y = and i32 %x, 16
   %t = icmp eq i32 %y, 0
   br i1 %t, label %true, label %false
diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll
index 1f899b3c20..6f18d13cc9 100644
--- a/test/CodeGen/X86/vec_cast.ll
+++ b/test/CodeGen/X86/vec_cast.ll
@@ -31,11 +31,10 @@ define <1 x i32> @f(<1 x i16> %a) nounwind {
   ret <1 x i32> %c
 }
 
-; TODO: Legalize doesn't yet handle this.
-;define <8 x i16> @g(<8 x i32> %a) nounwind {
-;  %c = trunc <8 x i32> %a to <8 x i16>
-;  ret <8 x i16> %c
-;}
+define <8 x i16> @g(<8 x i32> %a) nounwind {
+  %c = trunc <8 x i32> %a to <8 x i16>
+  ret <8 x i16> %c
+}
 
 define <3 x i16> @h(<3 x i32> %a) nounwind {
   %c = trunc <3 x i32> %a to <3 x i16>
@@ -46,3 +45,12 @@ define <1 x i16> @i(<1 x i32> %a) nounwind {
   %c = trunc <1 x i32> %a to <1 x i16>
   ret <1 x i16> %c
 }
+
+; PR6438
+define void @__OpenCL_math_kernel4_kernel() nounwind {
+  %tmp12.i = and <4 x i32> zeroinitializer, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040> ; <<4 x i32>> [#uses=1]
+  %cmp13.i = icmp eq <4 x i32> %tmp12.i, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040> ; <<4 x i1>> [#uses=2]
+  %cmp.ext14.i = sext <4 x i1> %cmp13.i to <4 x i32> ; <<4 x i32>> [#uses=0]
+  %tmp2110.i = and <4 x i1> %cmp13.i, zeroinitializer ; <<4 x i1>> [#uses=0]
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_insert.ll b/test/CodeGen/X86/vec_insert.ll
index a7274a9000..4e5d445ff6 100644
--- a/test/CodeGen/X86/vec_insert.ll
+++ b/test/CodeGen/X86/vec_insert.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1
 ; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw
 
-define void @test(<4 x float>* %F, i32 %I) {
+define void @test(<4 x float>* %F, i32 %I) nounwind {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
 	%f = sitofp i32 %I to float		; <float> [#uses=1]
 	%tmp1 = insertelement <4 x float> %tmp, float %f, i32 0		; <<4 x float>> [#uses=2]
@@ -10,7 +10,7 @@ define void @test(<4 x float>* %F, i32 %I) {
 	ret void
 }
 
-define void @test2(<4 x float>* %F, i32 %I, float %g) {
+define void @test2(<4 x float>* %F, i32 %I, float %g) nounwind {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
 	%f = sitofp i32 %I to float		; <float> [#uses=1]
 	%tmp1 = insertelement <4 x float> %tmp, float %f, i32 2		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-36.ll b/test/CodeGen/X86/vec_shuffle-36.ll
index 8a93a7eeee..1ea37c881e 100644
--- a/test/CodeGen/X86/vec_shuffle-36.ll
+++ b/test/CodeGen/X86/vec_shuffle-36.ll
@@ -1,9 +1,16 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
-; RUN: grep pshufb %t | count 1
-
+; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s
 
 define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+; CHECK: pshufb
+; CHECK-NOT: pshufb
+; CHECK: ret
 entry:
-	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 2, i32 0, i32 2, i32 1, i32 5, i32 6 , i32 undef >
-	ret <8 x i16> %tmp9
+  %tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 2, i32 0, i32 2, i32 1, i32 5, i32 6 , i32 undef >
+  ret <8 x i16> %tmp9
 }
+
+define <8 x i16> @shuf7(<8 x i16> %t0) {
+; CHECK: pshufd
+  %tmp10 = shufflevector <8 x i16> %t0, <8 x i16> undef, <8 x i32> < i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef >
+  ret <8 x i16> %tmp10
+}
+\ No newline at end of file
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index b1613fb3a3..c8b2927b71 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 -o %t
-; RUN: grep minss %t | grep CPI | count 2
-; RUN: grep CPI   %t | not grep movss
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse41 | FileCheck %s
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8.7.2"
@@ -17,6 +15,10 @@ define i16 @test1(float %f) nounwind {
 	%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
 	%tmp69 = trunc i32 %tmp.upgrd.1 to i16		; <i16> [#uses=1]
 	ret i16 %tmp69
+; CHECK: test1:
+; CHECK: subss	LCPI1_
+; CHECK: mulss	LCPI1_
+; CHECK: minss	LCPI1_
 }
 
 define i16 @test2(float %f) nounwind {
@@ -28,6 +30,10 @@ define i16 @test2(float %f) nounwind {
 	%tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
 	%tmp69 = trunc i32 %tmp to i16		; <i16> [#uses=1]
 	ret i16 %tmp69
+; CHECK: test2:
+; CHECK: addss	LCPI2_
+; CHECK: mulss	LCPI2_
+; CHECK: minss	LCPI2_
 }
 
 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
@@ -39,3 +45,28 @@ declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
 
 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
+
+
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32)
+declare <4 x float> @f()
+
+define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind {
+  %a = load float *%b
+  %B = insertelement <4 x float> undef, float %a, i32 0
+  %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4)
+  ret <4 x float> %X
+; CHECK: test3:
+; CHECK: roundss	$4, (%eax), %xmm0
+}
+
+define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
+  %a = load float *%b
+  %B = insertelement <4 x float> undef, float %a, i32 0
+  %q = call <4 x float> @f()
+  %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4)
+  ret <4 x float> %X
+; CHECK: test4:
+; CHECK: movss	(%eax), %xmm
+; CHECK: call
+; CHECK: roundss $4, %xmm{{.*}}, %xmm0
+}
diff --git a/test/CodeGen/X86/xor-icmp.ll b/test/CodeGen/X86/xor-icmp.ll
index a6bdb13ec6..2d75c5d762 100644
--- a/test/CodeGen/X86/xor-icmp.ll
+++ b/test/CodeGen/X86/xor-icmp.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X32
 ; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; rdar://7367229
 
 define i32 @t(i32 %a, i32 %b) nounwind ssp {
 entry:
@@ -34,3 +35,33 @@ bb1:                                              ; preds = %entry
 declare i32 @foo(...)
 
 declare i32 @bar(...)
+
+define i32 @t2(i32 %x, i32 %y) nounwind ssp {
+; X32: t2:
+; X32: cmpl
+; X32: sete
+; X32: cmpl
+; X32: sete
+; X32-NOT: xor
+; X32: je
+
+; X64: t2:
+; X64: testl
+; X64: sete
+; X64: testl
+; X64: sete
+; X64-NOT: xor
+; X64: je
+entry:
+  %0 = icmp eq i32 %x, 0                          ; <i1> [#uses=1]
+  %1 = icmp eq i32 %y, 0                          ; <i1> [#uses=1]
+  %2 = xor i1 %1, %0                              ; <i1> [#uses=1]
+  br i1 %2, label %bb, label %return
+
+bb:                                               ; preds = %entry
+  %3 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=0]
+  ret i32 undef
+
+return:                                           ; preds = %entry
+  ret i32 undef
+}
diff --git a/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll b/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll
new file mode 100644
index 0000000000..6ad9a73899
--- /dev/null
+++ b/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=xcore
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "xcore-xmos-elf"
+
+%0 = type { i32 }
+%struct.dwarf_fde = type <{ i32, i32, [0 x i8] }>
+%struct.object = type { i8*, i8*, i8*, %union.anon, %0, %struct.object* }
+%union.anon = type { %struct.dwarf_fde* }
+
+define %struct.dwarf_fde* @search_object(%struct.object* %ob, i8* %pc) {
+entry:
+  br i1 undef, label %bb3.i15.i.i, label %bb2
+
+bb3.i15.i.i:                                      ; preds = %bb3.i15.i.i, %entry
+  %indvar.i.i.i = phi i32 [ %indvar.next.i.i.i, %bb3.i15.i.i ], [ 0, %entry ] ; <i32> [#uses=2]
+  %tmp137 = sub i32 0, %indvar.i.i.i              ; <i32> [#uses=1]
+  %scevgep13.i.i.i = getelementptr i32* undef, i32 %tmp137 ; <i32*> [#uses=2]
+  %scevgep1314.i.i.i = bitcast i32* %scevgep13.i.i.i to %struct.dwarf_fde** ; <%struct.dwarf_fde**> [#uses=1]
+  %0 = load %struct.dwarf_fde** %scevgep1314.i.i.i, align 4 ; <%struct.dwarf_fde*> [#uses=0]
+  store i32 undef, i32* %scevgep13.i.i.i
+  %indvar.next.i.i.i = add i32 %indvar.i.i.i, 1   ; <i32> [#uses=1]
+  br label %bb3.i15.i.i
+
+bb2:                                              ; preds = %entry
+  ret %struct.dwarf_fde* undef
+}
diff --git a/test/CodeGen/XCore/switch.ll b/test/CodeGen/XCore/switch.ll
new file mode 100644
index 0000000000..9cc27f2ffa
--- /dev/null
+++ b/test/CodeGen/XCore/switch.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+define i32 @switch(i32 %i) {
+entry:
+        switch i32 %i, label %default [
+                 i32 0, label %bb0
+                 i32 1, label %bb1
+                 i32 2, label %bb2
+                 i32 3, label %bb3
+        ]
+; CHECK-NOT: shl
+; CHECK: bru
+; CHECK: .jmptable
+bb0:
+        ret i32 0
+bb1:
+        ret i32 1
+bb2:
+        ret i32 2
+bb3:
+        ret i32 3
+default:
+        ret i32 4
+}
diff --git a/test/CodeGen/XCore/switch_long.ll b/test/CodeGen/XCore/switch_long.ll
new file mode 100644
index 0000000000..30c9e3db73
--- /dev/null
+++ b/test/CodeGen/XCore/switch_long.ll
@@ -0,0 +1,132 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+define i32 @switch(i32 %i) {
+entry:
+        switch i32 %i, label %default [
+                 i32 0, label %bb0
+                 i32 1, label %bb1
+                 i32 2, label %bb2
+                 i32 3, label %bb3
+                 i32 4, label %bb4
+                 i32 5, label %bb5
+                 i32 6, label %bb6
+                 i32 7, label %bb7
+                 i32 8, label %bb8
+                 i32 9, label %bb9
+                 i32 10, label %bb10
+                 i32 11, label %bb11
+                 i32 12, label %bb12
+                 i32 13, label %bb13
+                 i32 14, label %bb14
+                 i32 15, label %bb15
+                 i32 16, label %bb16
+                 i32 17, label %bb17
+                 i32 18, label %bb18
+                 i32 19, label %bb19
+                 i32 20, label %bb20
+                 i32 21, label %bb21
+                 i32 22, label %bb22
+                 i32 23, label %bb23
+                 i32 24, label %bb24
+                 i32 25, label %bb25
+                 i32 26, label %bb26
+                 i32 27, label %bb27
+                 i32 28, label %bb28
+                 i32 29, label %bb29
+                 i32 30, label %bb30
+                 i32 31, label %bb31
+                 i32 32, label %bb32
+                 i32 33, label %bb33
+                 i32 34, label %bb34
+                 i32 35, label %bb35
+                 i32 36, label %bb36
+                 i32 37, label %bb37
+                 i32 38, label %bb38
+                 i32 39, label %bb39
+        ]
+; CHECK: shl
+; CHECK: bru
+; CHECK: .jmptable
+bb0:
+        ret i32 0
+bb1:
+        ret i32 1
+bb2:
+        ret i32 2
+bb3:
+        ret i32 3
+bb4:
+        ret i32 4
+bb5:
+        ret i32 5
+bb6:
+        ret i32 6
+bb7:
+        ret i32 7
+bb8:
+        ret i32 8
+bb9:
+        ret i32 9
+bb10:
+        ret i32 0
+bb11:
+        ret i32 1
+bb12:
+        ret i32 2
+bb13:
+        ret i32 3
+bb14:
+        ret i32 4
+bb15:
+        ret i32 5
+bb16:
+        ret i32 6
+bb17:
+        ret i32 7
+bb18:
+        ret i32 8
+bb19:
+        ret i32 9
+bb20:
+        ret i32 0
+bb21:
+        ret i32 1
+bb22:
+        ret i32 2
+bb23:
+        ret i32 3
+bb24:
+        ret i32 4
+bb25:
+        ret i32 5
+bb26:
+        ret i32 6
+bb27:
+        ret i32 7
+bb28:
+        ret i32 8
+bb29:
+        ret i32 9
+bb30:
+        ret i32 0
+bb31:
+        ret i32 1
+bb32:
+        ret i32 2
+bb33:
+        ret i32 3
+bb34:
+        ret i32 4
+bb35:
+        ret i32 5
+bb36:
+        ret i32 6
+bb37:
+        ret i32 7
+bb38:
+        ret i32 8
+bb39:
+        ret i32 9
+default:
+        ret i32 0
+}