aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp15
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp5
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp1
-rw-r--r--test/CodeGen/Thumb2/thumb2-ldm.ll40
4 files changed, 53 insertions, 8 deletions
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index c6afbe9336..a81b790f9d 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -615,12 +615,15 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
return false;
bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
- unsigned Offset = isAM5
- ? ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
- true, isDPR ? 2 : 1)
- : (isAM2
- ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
- : Bytes);
+ unsigned Offset = 0;
+ if (isAM5)
+ Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
+ ? ARM_AM::db
+ : ARM_AM::ia, true, (isDPR ? 2 : 1));
+ else if (isAM2)
+ Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ else
+ Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
if (isLd) {
if (isAM5)
// FLDMS, FLDMD
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 918ca7806b..a6e987b086 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -101,8 +101,9 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
- if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
+ // FIXME: temporarily disabling load / store optimization pass for Thumb1 mode.
+ if (OptLevel != CodeGenOpt::None && !DisableLdStOpti &&
+ !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass());
if (OptLevel != CodeGenOpt::None &&
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 4438a8c74c..a326185c88 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -599,6 +599,7 @@ void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
// FIXME
bool isLDM = (MI->getOpcode() == ARM::LDM ||
MI->getOpcode() == ARM::LDM_RET ||
+ MI->getOpcode() == ARM::t2LDM ||
MI->getOpcode() == ARM::t2LDM_RET);
O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
} else
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
new file mode 100644
index 0000000000..c90722ccfd
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -0,0 +1,40 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | FileCheck %s
+
+@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5]
+
+define i32 @t1() {
+; CHECK: t1:
+; CHECK: stmfd sp!, {r7, lr}
+; CHECK: ldmfd sp!, {r7, pc}
+ %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
+ ret i32 %tmp4
+}
+
+define i32 @t2() {
+; CHECK: t2:
+; CHECK: stmfd sp!, {r7, lr}
+; CHECK: ldmia
+; CHECK: ldmfd sp!, {r7, pc}
+ %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
+ %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+define i32 @t3() {
+; CHECK: t3:
+; CHECK: stmfd sp!, {r7, lr}
+; CHECK: ldmfd sp!, {r7, pc}
+ %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+declare i32 @f1(i32, i32)
+
+declare i32 @f2(i32, i32, i32)