aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.td10
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp17
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h2
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp16
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h4
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp4
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp3
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp209
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp6
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp10
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.h13
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp84
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp542
-rw-r--r--lib/Target/ARM/ARMISelLowering.h26
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td30
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td184
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td91
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td146
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td177
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td46
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h17
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp5
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h6
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td81
-rw-r--r--lib/Target/ARM/ARMSchedule.td18
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td57
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td933
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h4
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp52
-rw-r--r--lib/Target/ARM/ARMSubtarget.h11
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp2
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp383
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp314
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp12
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h12
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp15
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h43
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp172
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp28
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp43
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp157
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h73
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp6
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp12
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h2
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp8
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp8
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp6
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h6
56 files changed, 3080 insertions, 1044 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 2d7470919d..1bc9d6b410 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -38,7 +38,8 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
"Enable Thumb2 instructions">;
def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
- "Does not support ARM mode execution">;
+ "Does not support ARM mode execution",
+ [ModeThumb]>;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
@@ -59,6 +60,8 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
"FP compare + branch is slow">;
def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
"Floating point unit supports single precision only">;
+def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
+ "Enable support for Performance Monitor extensions">;
def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
"Enable support for TrustZone security extensions">;
@@ -134,7 +137,7 @@ def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
[HasV6Ops, FeatureThumb2]>;
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
- [HasV6T2Ops]>;
+ [HasV6T2Ops, FeaturePerfMon]>;
//===----------------------------------------------------------------------===//
// ARM Processors supported.
@@ -175,7 +178,8 @@ def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
FeatureTrustZone]>;
def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
"Cortex-R5 ARM processors",
- [FeatureSlowFPBrcc, FeatureHWDivARM,
+ [FeatureSlowFPBrcc,
+ FeatureHWDiv, FeatureHWDivARM,
FeatureHasSlowFPVMLx,
FeatureAvoidPartialCPSR,
FeatureT2XtPk]>;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 0d1417dd17..ad1447503f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -113,8 +113,7 @@ ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
if (Subtarget.isThumb2() || Subtarget.hasVFP2())
- return (ScheduleHazardRecognizer *)
- new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
+ return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
@@ -283,14 +282,20 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
return false;
--I;
}
- if (!isUnpredicatedTerminator(I))
- return false;
// Get the last instruction in the block.
MachineInstr *LastInst = I;
+ unsigned LastOpc = LastInst->getOpcode();
+
+ // Check if it's an indirect branch first, this should return 'unanalyzable'
+ // even if it's predicated.
+ if (isIndirectBranchOpcode(LastOpc))
+ return true;
+
+ if (!isUnpredicatedTerminator(I))
+ return false;
// If there is only one terminator instruction, process it.
- unsigned LastOpc = LastInst->getOpcode();
if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
if (isUncondBranchOpcode(LastOpc)) {
TBB = LastInst->getOperand(0).getMBB();
@@ -4146,6 +4151,8 @@ bool ARMBaseInstrInfo::hasNOP() const {
}
bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
+ if (MI->getNumOperands() < 4)
+ return true;
unsigned ShOpVal = MI->getOperand(3).getImm();
unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
// Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 2ef659c23b..4ca3d7b411 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -46,7 +46,7 @@ public:
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
- virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
+ virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0;
const ARMSubtarget &getSubtarget() const { return Subtarget; }
ScheduleHazardRecognizer *
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b0d34a76b0..7c03055808 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -43,9 +43,8 @@
using namespace llvm;
-ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
- const ARMSubtarget &sti)
- : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti),
+ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti),
FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
BasePtr(ARM::R6) {
}
@@ -94,6 +93,7 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
Reserved.set(ARM::FPSCR);
+ Reserved.set(ARM::APSR_NZCV);
if (TFI->hasFP(MF))
Reserved.set(FramePtr);
if (hasBasePointer(MF))
@@ -375,6 +375,7 @@ emitLoadConstPool(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred,
unsigned PredReg, unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C =
ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
@@ -556,9 +557,10 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
if (Ins != MBB->end())
DL = Ins->getDebugLoc();
- const MCInstrDesc &MCID = TII.get(ADDriOpc);
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const MCInstrDesc &MCID = TII.get(ADDriOpc);
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
@@ -574,6 +576,8 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
MachineInstr &MI = *I;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
@@ -671,6 +675,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
const ARMFrameLowering *TFI =
static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 0679919152..03b3682541 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -74,7 +74,6 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
protected:
- const ARMBaseInstrInfo &TII;
const ARMSubtarget &STI;
/// FramePtr - ARM physical register used as frame ptr.
@@ -86,8 +85,7 @@ protected:
unsigned BasePtr;
// Can be only subclassed.
- explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
- const ARMSubtarget &STI);
+ explicit ARMBaseRegisterInfo(const ARMSubtarget &STI);
// Return the opcode that implements 'Op', or 0 if no opcode
unsigned getOpcode(int Op) const;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 95decfe7d3..4a157d7430 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -1137,8 +1137,8 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
return;
} else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) {
uint32_t v = ~MI.getOperand(2).getImm();
- int32_t lsb = CountTrailingZeros_32(v);
- int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
+ int32_t lsb = countTrailingZeros(v);
+ int32_t msb = (32 - countLeadingZeros(v)) - 1;
// Instr{20-16} = msb, Instr{11-7} = lsb
Binary |= (msb & 0x1F) << 16;
Binary |= (lsb & 0x1F) << 7;
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 4891609b33..cff5ce27bc 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -128,7 +128,7 @@ namespace {
// If the block size isn't a multiple of the known bits, assume the
// worst case padding.
if (Size & ((1u << Bits) - 1))
- Bits = CountTrailingZeros_32(Size);
+ Bits = countTrailingZeros(Size);
return Bits;
}
@@ -753,6 +753,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
Scale = 4;
break;
+ case ARM::LDRBi12:
case ARM::LDRi12:
case ARM::LDRcp:
case ARM::t2LDRpci:
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 5d45f64912..a4de941689 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -20,6 +20,7 @@
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -41,6 +42,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -1025,7 +1027,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
useAM3 = true;
}
}
- RC = &ARM::GPRRegClass;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
break;
case MVT::i16:
if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
@@ -1040,7 +1042,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
useAM3 = true;
}
- RC = &ARM::GPRRegClass;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
break;
case MVT::i32:
if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
@@ -1054,7 +1056,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
} else {
Opc = ARM::LDRi12;
}
- RC = &ARM::GPRRegClass;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
break;
case MVT::f32:
if (!Subtarget->hasVFP2()) return false;
@@ -1063,7 +1065,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
needVMOV = true;
VT = MVT::i32;
Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
- RC = &ARM::GPRRegClass;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
} else {
Opc = ARM::VLDRS;
RC = TLI.getRegClassFor(VT);
@@ -1802,7 +1804,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
unsigned SrcReg2 = getRegForValue(I->getOperand(1));
if (SrcReg2 == 0) return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2));
@@ -1985,7 +1987,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
- assert (Arg != 0 && "Failed to emit a sext");
+ assert (Arg != 0 && "Failed to emit a zext");
ArgVT = DestVT;
break;
}
@@ -2602,47 +2604,112 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool isZExt) {
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
return 0;
+ if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
+ return 0;
- unsigned Opc;
- bool isBoolZext = false;
- const TargetRegisterClass *RC;
- switch (SrcVT.SimpleTy) {
- default: return 0;
- case MVT::i16:
- if (!Subtarget->hasV6Ops()) return 0;
- RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
- if (isZExt)
- Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
- else
- Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
- break;
- case MVT::i8:
- if (!Subtarget->hasV6Ops()) return 0;
- RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
- if (isZExt)
- Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
- else
- Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
- break;
- case MVT::i1:
- if (isZExt) {
- RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
- Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
- isBoolZext = true;
- break;
+ // Table of which combinations can be emitted as a single instruction,
+ // and which will require two.
+ static const uint8_t isSingleInstrTbl[3][2][2][2] = {
+ // ARM Thumb
+ // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
+ // ext: s z s z s z s z
+ /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
+ /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
+ /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
+ };
+
+ // Target registers for:
+ // - For ARM can never be PC.
+ // - For 16-bit Thumb are restricted to lower 8 registers.
+ // - For 32-bit Thumb are restricted to non-SP and non-PC.
+ static const TargetRegisterClass *RCTbl[2][2] = {
+ // Instructions: Two Single
+ /* ARM */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
+ /* Thumb */ { &ARM::tGPRRegClass, &ARM::rGPRRegClass }
+ };
+
+ // Table governing the instruction(s) to be emitted.
+ static const struct {
+ // First entry for each of the following is sext, second zext.
+ uint16_t Opc[2];
+ uint8_t Imm[2]; // All instructions have either a shift or a mask.
+ uint8_t hasS[2]; // Some instructions have an S bit, always set it to 0.
+ } OpcTbl[2][2][3] = {
+ { // Two instructions (first is left shift, second is in this table).
+ { // ARM
+ /* 1 */ { { ARM::ASRi, ARM::LSRi }, { 31, 31 }, { 1, 1 } },
+ /* 8 */ { { ARM::ASRi, ARM::LSRi }, { 24, 24 }, { 1, 1 } },
+ /* 16 */ { { ARM::ASRi, ARM::LSRi }, { 16, 16 }, { 1, 1 } }
+ },
+ { // Thumb
+ /* 1 */ { { ARM::tASRri, ARM::tLSRri }, { 31, 31 }, { 0, 0 } },
+ /* 8 */ { { ARM::tASRri, ARM::tLSRri }, { 24, 24 }, { 0, 0 } },
+ /* 16 */ { { ARM::tASRri, ARM::tLSRri }, { 16, 16 }, { 0, 0 } }
+ }
+ },
+ { // Single instruction.
+ { // ARM
+ /* 1 */ { { ARM::KILL, ARM::ANDri }, { 0, 1 }, { 0, 1 } },
+ /* 8 */ { { ARM::SXTB, ARM::ANDri }, { 0, 255 }, { 0, 1 } },
+ /* 16 */ { { ARM::SXTH, ARM::UXTH }, { 0, 0 }, { 0, 0 } }
+ },
+ { // Thumb
+ /* 1 */ { { ARM::KILL, ARM::t2ANDri }, { 0, 1 }, { 0, 1 } },
+ /* 8 */ { { ARM::t2SXTB, ARM::t2ANDri }, { 0, 255 }, { 0, 1 } },
+ /* 16 */ { { ARM::t2SXTH, ARM::t2UXTH }, { 0, 0 }, { 0, 0 } }
+ }
}
- return 0;
+ };
+
+ unsigned SrcBits = SrcVT.getSizeInBits();
+ unsigned DestBits = DestVT.getSizeInBits();
+ (void) DestBits;
+ assert((SrcBits < DestBits) && "can only extend to larger types");
+ assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
+ "other sizes unimplemented");
+ assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
+ "other sizes unimplemented");
+
+ bool hasV6Ops = Subtarget->hasV6Ops();
+ unsigned Bitness = countTrailingZeros(SrcBits) >> 1; // {1,8,16}=>{0,1,2}
+ assert((Bitness < 3) && "sanity-check table bounds");
+
+ bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
+ const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
+ unsigned Opc = OpcTbl[isSingleInstr][isThumb2][Bitness].Opc[isZExt];
+ assert(ARM::KILL != Opc && "Invalid table entry");
+ unsigned Imm = OpcTbl[isSingleInstr][isThumb2][Bitness].Imm[isZExt];
+ unsigned hasS = OpcTbl[isSingleInstr][isThumb2][Bitness].hasS[isZExt];
+
+ // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
+ bool setsCPSR = &ARM::tGPRRegClass == RC;
+ unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::LSLi;
+ unsigned ResultReg;
+
+ // Either one or two instructions are emitted.
+ // They're always of the form:
+ // dst = in OP imm
+ // CPSR is set only by 16-bit Thumb instructions.
+ // Predicate, if any, is AL.
+ // S bit, if available, is always 0.
+ // When two are emitted the first's result will feed as the second's input,
+ // that value is then dead.
+ unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
+ for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
+ ResultReg = createResultReg(RC);
+ unsigned Opcode = ((0 == Instr) && !isSingleInstr) ? LSLOpc : Opc;
+ bool isKill = 1 == Instr;
+ MachineInstrBuilder MIB = BuildMI(
+ *FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg);
+ if (setsCPSR)
+ MIB.addReg(ARM::CPSR, RegState::Define);
+ AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(Imm));
+ if (hasS)
+ AddDefaultCC(MIB);
+ // Second instruction consumes the first's result.
+ SrcReg = ResultReg;
}
- unsigned ResultReg = createResultReg(RC);
- MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
- .addReg(SrcReg);
- if (isBoolZext)
- MIB.addImm(1);
- else
- MIB.addImm(0);
- AddOptionalDefs(MIB);
return ResultReg;
}
@@ -2707,7 +2774,7 @@ bool ARMFastISel::SelectShift(const Instruction *I,
if (Reg2 == 0) return false;
}
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
if(ResultReg == 0) return false;
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -2797,6 +2864,25 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
return false;
}
+namespace {
+// This table describes sign- and zero-extend instructions which can be
+// folded into a preceding load. All of these extends have an immediate
+// (sometimes a mask and sometimes a shift) that's applied after
+// extension.
+const struct FoldableLoadExtendsStruct {
+ uint16_t Opc[2]; // ARM, Thumb.
+ uint8_t ExpectedImm;
+ uint8_t isZExt : 1;
+ uint8_t ExpectedVT : 7;
+} FoldableLoadExtends[] = {
+ { { ARM::SXTH, ARM::t2SXTH }, 0, 0, MVT::i16 },
+ { { ARM::UXTH, ARM::t2UXTH }, 0, 1, MVT::i16 },
+ { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8 },
+ { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 },
+ { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 }
+};
+}
+
/// \brief The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
/// try to fold the load as an operand to the instruction, returning true if
@@ -2812,26 +2898,23 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
// ldrb r1, [r0] ldrb r1, [r0]
// uxtb r2, r1 =>
// mov r3, r2 mov r3, r1
- bool isZExt = true;
- switch(MI->getOpcode()) {
- default: return false;
- case ARM::SXTH:
- case ARM::t2SXTH:
- isZExt = false;
- case ARM::UXTH:
- case ARM::t2UXTH:
- if (VT != MVT::i16)
- return false;
- break;
- case ARM::SXTB:
- case ARM::t2SXTB:
- isZExt = false;
- case ARM::UXTB:
- case ARM::t2UXTB:
- if (VT != MVT::i8)
- return false;
- break;
+ if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())
+ return false;
+ const uint64_t Imm = MI->getOperand(2).getImm();
+
+ bool Found = false;
+ bool isZExt;
+ for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends);
+ i != e; ++i) {
+ if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() &&
+ (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm &&
+ MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) {
+ Found = true;
+ isZExt = FoldableLoadExtends[i].isZExt;
+ }
}
+ if (!Found) return false;
+
// See if we can handle this address.
Address Addr;
if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 483802b130..c8637be2bd 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -141,7 +141,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
assert(!AFI->isThumb1OnlyFunction() &&
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -357,7 +358,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 1240169e84..c69d313fd9 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -44,10 +44,16 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
MachineInstr *DefMI = LastMI;
const MCInstrDesc &LastMCID = LastMI->getDesc();
+ const TargetMachine &TM =
+ MI->getParent()->getParent()->getTarget();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+
// Skip over one non-VFP / NEON instruction.
if (!LastMI->isBarrier() &&
// On A9, AGU and NEON/FPU are muxed.
- !(STI.isLikeA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
+ !(TII.getSubtarget().isLikeA9() &&
+ (LastMI->mayLoad() || LastMI->mayStore())) &&
(LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
MachineBasicBlock::iterator I = LastMI;
if (I != LastMI->getParent()->begin()) {
@@ -58,7 +64,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (TII.isFpMLxInstruction(DefMI->getOpcode()) &&
(TII.canCauseFpMLxStall(MI->getOpcode()) ||
- hasRAWHazard(DefMI, MI, TRI))) {
+ hasRAWHazard(DefMI, MI, TII.getRegisterInfo()))) {
// Try to schedule another instruction for the next 4 cycles.
if (FpMLxStalls == 0)
FpMLxStalls = 4;
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index 98bfc4cf0c..e1dcec3d1c 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -28,21 +28,14 @@ class MachineInstr;
/// ARM preRA scheduler uses an unspecialized instance of the
/// ScoreboardHazardRecognizer.
class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
- const ARMBaseInstrInfo &TII;
- const ARMBaseRegisterInfo &TRI;
- const ARMSubtarget &STI;
-
MachineInstr *LastMI;
unsigned FpMLxStalls;
public:
ARMHazardRecognizer(const InstrItineraryData *ItinData,
- const ARMBaseInstrInfo &tii,
- const ARMBaseRegisterInfo &tri,
- const ARMSubtarget &sti,
- const ScheduleDAG *DAG) :
- ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
- TRI(tri), STI(sti), LastMI(0) {}
+ const ScheduleDAG *DAG)
+ : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"),
+ LastMI(0) {}
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void Reset();
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9e1782e119..962368d07f 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -364,7 +364,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
continue;
// Check if the AND mask is an immediate of the form: 000.....1111111100
- unsigned TZ = CountTrailingZeros_32(And_imm);
+ unsigned TZ = countTrailingZeros(And_imm);
if (TZ != 1 && TZ != 2)
// Be conservative here. Shifter operands aren't always free. e.g. On
// Swift, left shifter operand of 1 / 2 for free but others are not.
@@ -402,12 +402,12 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
}
// Now make the transformation.
- Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32,
+ Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
Srl.getOperand(0),
CurDAG->getConstant(Srl_imm+TZ, MVT::i32));
- N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32,
+ N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
Srl, CurDAG->getConstant(And_imm, MVT::i32));
- N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32,
+ N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
N1, CurDAG->getConstant(TZ, MVT::i32));
CurDAG->UpdateNodeOperands(N, N0, N1);
}
@@ -533,7 +533,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -557,7 +557,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -703,7 +703,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
} else if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
@@ -724,7 +724,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -901,7 +901,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
@@ -915,7 +915,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -960,7 +960,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
} else if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
@@ -978,7 +978,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
ARM_AM::AddrOpc AddSub = ARM_AM::add;
@@ -1202,7 +1202,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
SDValue &Base, SDValue &OffImm) {
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -1219,7 +1219,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -1267,7 +1267,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -1297,7 +1297,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -1326,7 +1326,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -1468,14 +1468,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
- return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
+ return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
MVT::i32, MVT::Other, Ops);
} else {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
- return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
+ return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
MVT::i32, MVT::Other, Ops);
}
}
@@ -1524,7 +1524,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
- return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
+ return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
MVT::Other, Ops);
}
@@ -1533,7 +1533,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
@@ -1544,7 +1544,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
/// \brief Form a D register from a pair of S registers.
SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
@@ -1555,7 +1555,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
/// \brief Form a quad register from a pair of D registers.
SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
@@ -1565,7 +1565,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
/// \brief Form 4 consecutive D registers from a pair of Q registers.
SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
@@ -1576,7 +1576,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
/// \brief Form 4 consecutive S registers.
SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
@@ -1591,7 +1591,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
/// \brief Form 4 consecutive D registers.
SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
@@ -1605,7 +1605,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
/// \brief Form 4 consecutive Q registers.
SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
@@ -1689,7 +1689,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *QOpcodes0,
const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue MemAddr, Align;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
@@ -1821,7 +1821,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *QOpcodes0,
const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue MemAddr, Align;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
@@ -1966,7 +1966,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue MemAddr, Align;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
@@ -2084,7 +2084,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
unsigned NumVecs,
const uint16_t *Opcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue MemAddr, Align;
if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
@@ -2166,7 +2166,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
unsigned Opc) {
assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
unsigned FirstTblReg = IsExt ? 2 : 1;
@@ -2536,7 +2536,7 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
Ops.push_back(Node->getOperand(0)); // Chain
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
- SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node),
MVT::i32, MVT::i32, MVT::Other,
Ops);
cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
@@ -2544,7 +2544,7 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
}
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
if (N->isMachineOpcode())
return NULL; // Already selected.
@@ -2587,7 +2587,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue CPIdx =
CurDAG->getTargetConstantPool(ConstantInt::get(
Type::getInt32Ty(*CurDAG->getContext()), Val),
- TLI.getPointerTy());
+ TLI->getPointerTy());
SDNode *ResNode;
if (Subtarget->isThumb1Only()) {
@@ -2617,7 +2617,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::FrameIndex: {
// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
if (Subtarget->isThumb1Only()) {
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
@@ -3121,7 +3121,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_ldrexd: {
SDValue MemAddr = N->getOperand(2);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue Chain = N->getOperand(0);
bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
@@ -3179,7 +3179,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case Intrinsic::arm_strexd: {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue Chain = N->getOperand(0);
SDValue Val0 = N->getOperand(2);
SDValue Val1 = N->getOperand(3);
@@ -3383,7 +3383,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VTBL1: {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
SmallVector<SDValue, 6> Ops;
@@ -3394,7 +3394,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
}
case ARMISD::VTBL2: {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
// Form a REG_SEQUENCE to force register allocation.
@@ -3462,7 +3462,7 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
if (AsmString.find(":H}") == StringRef::npos)
return NULL;
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue Glue = N->getOperand(NumOps-1);
// Glue node will be appended late.
@@ -3567,7 +3567,7 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
if (!Changed)
return NULL;
- SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0],
AsmNodeOperands.size());
New->setNodeId(-1);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 9475f1b5a0..ec0e9c2b54 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -681,6 +681,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -1069,7 +1071,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
+EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector()) return getPointerTy();
return VT.changeVectorElementTypeToInteger();
}
@@ -1233,7 +1235,7 @@ SDValue
ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
bool isThisReturn, SDValue ThisVal) const {
@@ -1314,7 +1316,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SDValue
ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue StackPtr, SDValue Arg,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
@@ -1325,7 +1327,7 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
false, false, 0);
}
-void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
SDValue Chain, SDValue &Arg,
RegsToPassVector &RegsToPass,
CCValAssign &VA, CCValAssign &NextVA,
@@ -1357,7 +1359,7 @@ SDValue
ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
- DebugLoc &dl = CLI.DL;
+ SDLoc &dl = CLI.DL;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
@@ -1406,7 +1408,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!isSibCall)
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
@@ -1481,10 +1484,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// True if this byval aggregate will be split between registers
// and memory.
- if (CCInfo.isFirstByValRegValid()) {
+ unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
+ unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+
+ if (CurByValIdx < ByValArgsCount) {
+
+ unsigned RegBegin, RegEnd;
+ CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
+
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
unsigned int i, j;
- for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+ for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
SDValue Const = DAG.getConstant(4*i, MVT::i32);
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
@@ -1493,11 +1503,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
- offset = ARM::R4 - CCInfo.getFirstByValReg();
- CCInfo.clearFirstByValReg();
+
+ // If parameter size outsides register area, "offset" value
+ // helps us to calculate stack slot for remained part properly.
+ offset = RegEnd - RegBegin;
+
+ CCInfo.nextInRegsParam();
}
- if (Flags.getByValSize() - 4*offset > 0) {
+ if (Flags.getByValSize() > 4*offset) {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
@@ -1718,7 +1732,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag);
+ DAG.getIntPtrConstant(0, true), InFlag, dl);
if (!Ins.empty())
InFlag = Chain.getValue(1);
@@ -1740,9 +1754,24 @@ ARMTargetLowering::HandleByVal(
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
- if ((!State->isFirstByValRegValid()) &&
- (!Subtarget->isAAPCS_ABI() || State->getNextStackOffset() == 0) &&
- (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+
+ // For in-prologue parameters handling, we also introduce stack offset
+ // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal.
+ // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how
+ // NSAA should be evaluted (NSAA means "next stacked argument address").
+ // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs.
+ // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs.
+ unsigned NSAAOffset = State->getNextStackOffset();
+ if (State->getCallOrPrologue() != Call) {
+ for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) {
+ unsigned RB, RE;
+ State->getInRegsParamInfo(i, RB, RE);
+ assert(NSAAOffset >= (RE-RB)*4 &&
+ "Stack offset for byval regs doesn't introduced anymore?");
+ NSAAOffset -= (RE-RB)*4;
+ }
+ }
+ if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
if (Subtarget->isAAPCS_ABI() && Align > 4) {
unsigned AlignInRegs = Align / 4;
unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
@@ -1750,22 +1779,45 @@ ARMTargetLowering::HandleByVal(
reg = State->AllocateReg(GPRArgRegs, 4);
}
if (reg != 0) {
- State->setFirstByValReg(reg);
+ unsigned excess = 4 * (ARM::R4 - reg);
+
+ // Special case when NSAA != SP and parameter size greater than size of
+ // all remained GPR regs. In that case we can't split parameter, we must
+ // send it to stack. We also must set NCRN to R4, so waste all
+ // remained registers.
+ if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
+ while (State->AllocateReg(GPRArgRegs, 4))
+ ;
+ return;
+ }
+
+ // First register for byval parameter is the first register that wasn't
+ // allocated before this method call, so it would be "reg".
+ // If parameter is small enough to be saved in range [reg, r4), then
+ // the end (first after last) register would be reg + param-size-in-regs,
+ // else parameter would be splitted between registers and stack,
+ // end register would be r4 in this case.
+ unsigned ByValRegBegin = reg;
+ unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
+ State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+ // Note, first register is allocated in the beginning of function already,
+ // allocate remained amount of registers we need.
+ for (unsigned i = reg+1; i != ByValRegEnd; ++i)
+ State->AllocateReg(GPRArgRegs, 4);
// At a call site, a byval parameter that is split between
// registers and memory needs its size truncated here. In a
// function prologue, such byval parameters are reassembled in
// memory, and are not truncated.
if (State->getCallOrPrologue() == Call) {
- unsigned excess = 4 * (ARM::R4 - reg);
- assert(size >= excess && "expected larger existing stack allocation");
- size -= excess;
+ // Make remained size equal to 0 in case, when
+ // the whole structure may be stored into registers.
+ if (size < excess)
+ size = 0;
+ else
+ size -= excess;
}
}
}
- // Confiscate any remaining parameter registers to preclude their
- // assignment to subsequent parameters.
- while (State->AllocateReg(GPRArgRegs, 4))
- ;
}
/// MatchingStackOffset - Return true if the given stack call argument is
@@ -1970,7 +2022,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
+ SDLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 16> RVLocs;
@@ -2098,7 +2150,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
Copy = *Copy->use_begin();
if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
return false;
- Chain = Copy->getOperand(0);
+ TCChain = Copy->getOperand(0);
} else {
return false;
}
@@ -2137,7 +2189,7 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
EVT PtrVT = Op.getValueType();
// FIXME there is no actual debug info here
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDValue Res;
if (CP->isMachineConstantPoolEntry())
@@ -2158,7 +2210,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = 0;
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
EVT PtrVT = getPointerTy();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
@@ -2187,7 +2239,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const {
- DebugLoc dl = GA->getDebugLoc();
+ SDLoc dl(GA);
EVT PtrVT = getPointerTy();
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
MachineFunction &MF = DAG.getMachineFunction();
@@ -2230,7 +2282,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
TLSModel::Model model) const {
const GlobalValue *GV = GA->getGlobal();
- DebugLoc dl = GA->getDebugLoc();
+ SDLoc dl(GA);
SDValue Offset;
SDValue Chain = DAG.getEntryNode();
EVT PtrVT = getPointerTy();
@@ -2300,7 +2352,7 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
@@ -2343,7 +2395,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
@@ -2408,7 +2460,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
@@ -2424,7 +2476,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue Val = DAG.getConstant(0, MVT::i32);
return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
@@ -2433,7 +2485,7 @@ ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
SDValue
ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
Op.getOperand(1), DAG.getConstant(0, MVT::i32));
}
@@ -2442,7 +2494,7 @@ SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::arm_thread_pointer: {
@@ -2478,7 +2530,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
case Intrinsic::arm_neon_vmullu: {
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
? ARMISD::VMULLs : ARMISD::VMULLu;
- return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
}
@@ -2487,7 +2539,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
// FIXME: handle "fence singlethread" more efficiently.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (!Subtarget->hasDataBarrier()) {
// Some ARMv6 cpus can support data barriers with an mcr instruction.
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
@@ -2510,7 +2562,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
// Just preserve the chain.
return Op.getOperand(0);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
if (!isRead &&
(!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
@@ -2535,7 +2587,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
@@ -2546,7 +2598,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
SDValue
ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -2580,13 +2632,17 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
void
ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned InRegsParamRecordIdx,
+ unsigned ArgSize,
unsigned &ArgRegsSize,
unsigned &ArgRegsSaveSize)
const {
unsigned NumGPRs;
- if (CCInfo.isFirstByValRegValid())
- NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
- else {
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ unsigned RBegin, REnd;
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ NumGPRs = REnd - RBegin;
+ } else {
unsigned int firstUnalloced;
firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
sizeof(GPRArgRegs) /
@@ -2596,7 +2652,29 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
ArgRegsSize = NumGPRs * 4;
- ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1);
+
+ // If parameter is split between stack and GPRs...
+ if (NumGPRs && Align == 8 &&
+ (ArgRegsSize < ArgSize ||
+ InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
+ // Add padding for part of param recovered from GPRs, so
+ // its last byte must be at address K*8 - 1.
+ // We need to do it, since remained (stack) part of parameter has
+ // stack alignment, and we need to "attach" "GPRs head" without gaps
+ // to it:
+ // Stack:
+ // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
+ // [ [padding] [GPRs head] ] [ Tail passed via stack ....
+ //
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned Padding =
+ ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) -
+ (ArgRegsSize + AFI->getArgRegsSaveSize());
+ ArgRegsSaveSize = ArgRegsSize + Padding;
+ } else
+ // We don't need to extend regs save size for byval parameters if they
+ // are passed via GPRs only.
+ ArgRegsSaveSize = ArgRegsSize;
}
// The remaining GPRs hold either the beginning of variable-argument
@@ -2609,10 +2687,12 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
// Return: The frame index registers were stored into.
int
ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain,
+ SDLoc dl, SDValue &Chain,
const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
unsigned OffsetFromOrigArg,
unsigned ArgOffset,
+ unsigned ArgSize,
bool ForceMutable) const {
// Currently, two use-cases possible:
@@ -2629,33 +2709,45 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned firstRegToSaveIndex;
- if (CCInfo.isFirstByValRegValid())
- firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
- else {
+ unsigned firstRegToSaveIndex, lastRegToSaveIndex;
+ unsigned RBegin, REnd;
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ firstRegToSaveIndex = RBegin - ARM::R0;
+ lastRegToSaveIndex = REnd - ARM::R0;
+ } else {
firstRegToSaveIndex = CCInfo.getFirstUnallocated
(GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+ lastRegToSaveIndex = 4;
}
unsigned ArgRegsSize, ArgRegsSaveSize;
- computeRegArea(CCInfo, MF, ArgRegsSize, ArgRegsSaveSize);
+ computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
+ ArgRegsSize, ArgRegsSaveSize);
// Store any by-val regs to their spots on the stack so that they may be
// loaded by deferencing the result of formal parameter pointer or va_next.
// Note: once stack area for byval/varargs registers
// was initialized, it can't be initialized again.
- if (!AFI->getArgRegsSaveSize() && ArgRegsSaveSize) {
+ if (ArgRegsSaveSize) {
+
+ unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
- AFI->setArgRegsSaveSize(ArgRegsSaveSize);
+ if (Padding) {
+ assert(AFI->getStoredByValParamsPadding() == 0 &&
+ "The only parameter may be padded.");
+ AFI->setStoredByValParamsPadding(Padding);
+ }
int FrameIndex = MFI->CreateFixedObject(
ArgRegsSaveSize,
- ArgOffset + ArgRegsSaveSize - ArgRegsSize,
+ Padding + ArgOffset,
false);
SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
SmallVector<SDValue, 4> MemOps;
- for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
+ for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
+ ++firstRegToSaveIndex, ++i) {
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = &ARM::tGPRRegClass;
@@ -2672,19 +2764,23 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
}
+
+ AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
return FrameIndex;
} else
// This will point to the next argument passed via stack.
- return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable);
+ return MFI->CreateFixedObject(
+ 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable);
}
// Setup stack frame, the va_list pointer will start from.
void
ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain,
+ SDLoc dl, SDValue &Chain,
unsigned ArgOffset,
bool ForceMutable) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -2696,7 +2792,8 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
// If there is no regs to be stored, just point address after last
// argument passed via stack.
int FrameIndex =
- StoreByValRegs(CCInfo, DAG, dl, Chain, 0, 0, ArgOffset, ForceMutable);
+ StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
+ 0, ArgOffset, 0, ForceMutable);
AFI->setVarArgsFrameIndex(FrameIndex);
}
@@ -2706,7 +2803,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -2727,6 +2824,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue ArgValue;
Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
+
+ // Initially ArgRegsSaveSize is zero.
+ // Then we increase this value each time we meet byval parameter.
+ // We also increase this value in case of varargs function.
+ AFI->setArgRegsSaveSize(0);
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
@@ -2824,15 +2927,21 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
+ unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
int FrameIndex = StoreByValRegs(
- CCInfo, DAG, dl, Chain, CurOrigArg,
- Ins[VA.getValNo()].PartOffset,
- VA.getLocMemOffset(),
- true /*force mutable frames*/);
+ CCInfo, DAG, dl, Chain, CurOrigArg,
+ CurByValIndex,
+ Ins[VA.getValNo()].PartOffset,
+ VA.getLocMemOffset(),
+ Flags.getByValSize(),
+ true /*force mutable frames*/);
InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
+ CCInfo.nextInRegsParam();
} else {
+ unsigned FIOffset = VA.getLocMemOffset() +
+ AFI->getStoredByValParamsPadding();
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
- VA.getLocMemOffset(), true);
+ FIOffset, true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -2874,7 +2983,7 @@ static bool isFloatingPointZero(SDValue Op) {
SDValue
ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
unsigned C = RHSC->getZExtValue();
if (!isLegalICmpImmediate(C)) {
@@ -2932,7 +3041,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue
ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
@@ -2946,7 +3055,7 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
SDValue
ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
unsigned Opc = Cmp.getOpcode();
- DebugLoc DL = Cmp.getDebugLoc();
+ SDLoc DL(Cmp);
if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
@@ -2966,7 +3075,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
SDValue SelectFalse = Op.getOperand(2);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// Convert:
//
@@ -3021,7 +3130,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue TrueVal = Op.getOperand(2);
SDValue FalseVal = Op.getOperand(3);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
@@ -3076,7 +3185,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
return DAG.getConstant(0, MVT::i32);
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
- return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ return DAG.getLoad(MVT::i32, SDLoc(Op),
Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->isInvariant(), Ld->getAlignment());
@@ -3094,7 +3203,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
SDValue Ptr = Ld->getBasePtr();
- RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op),
Ld->getChain(), Ptr,
Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
@@ -3102,9 +3211,9 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
EVT PtrType = Ptr.getValueType();
unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
- SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+ SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op),
PtrType, Ptr, DAG.getConstant(4, PtrType));
- RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op),
Ld->getChain(), NewPtr,
Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
@@ -3124,7 +3233,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
bool LHSSeenZero = false;
bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
@@ -3174,7 +3283,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
@@ -3215,7 +3324,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Table = Op.getOperand(1);
SDValue Index = Op.getOperand(2);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT PTy = getPointerTy();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
@@ -3251,7 +3360,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Op.getValueType().getVectorElementType() == MVT::i32) {
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
@@ -3273,7 +3382,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Opc;
switch (Op.getOpcode()) {
@@ -3291,7 +3400,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
if (VT.getVectorElementType() == MVT::f32)
@@ -3327,7 +3436,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Opc;
switch (Op.getOpcode()) {
@@ -3348,7 +3457,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// Implement fcopysign with a fabs and a conditional fneg.
SDValue Tmp0 = Op.getOperand(0);
SDValue Tmp1 = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
@@ -3432,7 +3541,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
MFI->setReturnAddressIsTaken(true);
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
@@ -3452,7 +3561,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
? ARM::R7 : ARM::R11;
@@ -3481,7 +3590,7 @@ static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
return SDValue();
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
unsigned NumElts = SrcVT.getVectorNumElements();
@@ -3512,7 +3621,7 @@ static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
/// vectors), since the legalizer won't know what to do with that.
static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue Op = N->getOperand(0);
// This function is only supposed to be called for i64 types, either as the
@@ -3549,7 +3658,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
/// not support i64 elements, so sometimes the zero vectors will need to be
/// explicitly constructed. Regardless, use a canonical VMOV to create the
/// zero vector.
-static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
// The canonical modified immediate encoding of a zero vector is....0!
SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
@@ -3565,7 +3674,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
@@ -3601,7 +3710,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
@@ -3634,7 +3743,7 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
DAG.getConstant(Intrinsic::arm_get_fpscr,
MVT::i32));
@@ -3649,7 +3758,7 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
if (!ST->hasV6T2Ops())
return SDValue();
@@ -3673,7 +3782,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
@@ -3695,7 +3804,7 @@ static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
/// v4i16:Extracted = [k0 k1 k2 k3 ]
static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
if (VT.is64BitVector()) {
@@ -3730,7 +3839,7 @@ static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
///
static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
@@ -3769,7 +3878,7 @@ static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
if (!VT.isVector())
return SDValue();
@@ -3804,7 +3913,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
// We can get here for a node like i32 = ISD::SHL i32, i64
if (VT != MVT::i64)
@@ -3850,7 +3959,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
SDValue CC = Op.getOperand(2);
EVT VT = Op.getValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Op.getOperand(1).getValueType().isFloatingPoint()) {
switch (SetCCOpcode) {
@@ -4119,7 +4228,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
APFloat FPVal = CFP->getValueAPF();
int ImmVal = ARM_AM::getFP32Imm(FPVal);
if (ImmVal != -1) {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
NewVal);
@@ -4133,7 +4242,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
VMOVModImm);
if (NewVal != SDValue()) {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
NewVal);
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
@@ -4146,7 +4255,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
VMVNModImm);
if (NewVal != SDValue()) {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
@@ -4406,7 +4515,7 @@ static bool isReverseMask(ArrayRef<int> M, EVT VT) {
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
- const ARMSubtarget *ST, DebugLoc dl) {
+ const ARMSubtarget *ST, SDLoc dl) {
uint64_t Val;
if (!isa<ConstantSDNode>(N))
return SDValue();
@@ -4427,7 +4536,7 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const {
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
APInt SplatBits, SplatUndef;
@@ -4617,7 +4726,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// shuffle in combination with VEXTs.
SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
@@ -4806,7 +4915,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
/// the specified operations to build the shuffle.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
- DebugLoc dl) {
+ SDLoc dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
@@ -4886,7 +4995,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
// Check to see if we can use the VTBL instruction.
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SmallVector<SDValue, 8> VTBLMask;
for (ArrayRef<int>::iterator
@@ -4905,7 +5014,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
SelectionDAG &DAG) {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue OpLHS = Op.getOperand(0);
EVT VT = OpLHS.getValueType();
@@ -4923,7 +5032,7 @@ static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
@@ -5087,7 +5196,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
SDValue Vec = Op.getOperand(0);
if (Op.getValueType() == MVT::i32 &&
Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
}
@@ -5099,7 +5208,7 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
// two 64-bit vectors are concatenated to a 128-bit vector.
assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
"unexpected CONCAT_VECTORS");
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue Val = DAG.getUNDEF(MVT::v2f64);
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -5188,6 +5297,23 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
return false;
}
+static EVT getExtensionTo64Bits(const EVT &OrigVT) {
+ if (OrigVT.getSizeInBits() >= 64)
+ return OrigVT;
+
+ assert(OrigVT.isSimple() && "Expecting a simple value type");
+
+ MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
+ switch (OrigSimpleTy) {
+ default: llvm_unreachable("Unexpected Vector Type");
+ case MVT::v2i8:
+ case MVT::v2i16:
+ return MVT::v2i32;
+ case MVT::v4i8:
+ return MVT::v4i16;
+ }
+}
+
/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
/// We insert the required extension here to get the vector to fill a D register.
@@ -5203,19 +5329,9 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
return N;
// Must extend size to at least 64 bits to be used as an operand for VMULL.
- MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
- EVT NewVT;
- switch (OrigSimpleTy) {
- default: llvm_unreachable("Unexpected Orig Vector Type");
- case MVT::v2i8:
- case MVT::v2i16:
- NewVT = MVT::v2i32;
- break;
- case MVT::v4i8:
- NewVT = MVT::v4i16;
- break;
- }
- return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
+ EVT NewVT = getExtensionTo64Bits(OrigTy);
+
+ return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
}
/// SkipLoadExtensionForVMULL - return a load of the original vector size that
@@ -5224,22 +5340,22 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
/// reach a total size of 64 bits. We have to add the extension separately
/// because ARM does not have a sign/zero extending load for vectors.
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
- SDValue NonExtendingLoad =
- DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+ EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
+
+ // The load already has the right type.
+ if (ExtendedTy == LD->getMemoryVT())
+ return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
LD->isNonTemporal(), LD->isInvariant(),
LD->getAlignment());
- unsigned ExtOp = 0;
- switch (LD->getExtensionType()) {
- default: llvm_unreachable("Unexpected LoadExtType");
- case ISD::EXTLOAD:
- case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
- }
- MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
- MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
- return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
- MemType, ExtType, ExtOp);
+
+ // We need to create a zextload/sextload. We cannot just create a load
+ // followed by a zext/zext node because LowerMUL is also run during normal
+ // operation legalization where we can't create illegal types.
+ return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
+ LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
}
/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
@@ -5265,7 +5381,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,
BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
}
// Construct a new BUILD_VECTOR with elements truncated to half the size.
@@ -5282,7 +5398,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
}
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
}
@@ -5354,7 +5470,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
}
// Legalize to a VMULL instruction.
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue Op0;
SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
if (!isMLA) {
@@ -5384,7 +5500,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
}
static SDValue
-LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
+LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
// Convert to float
// float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
// float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
@@ -5413,7 +5529,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
}
static SDValue
-LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
+LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
SDValue N2;
// Convert to float.
// float4 yf = vcvt_f32_s32(vmovl_s16(y));
@@ -5454,7 +5570,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::SDIV");
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2, N3;
@@ -5489,7 +5605,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::UDIV");
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2, N3;
@@ -5573,9 +5689,9 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
}
if (!ExtraOp)
- return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
Op.getOperand(1));
- return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
Op.getOperand(1), Op.getOperand(2));
}
@@ -5589,11 +5705,10 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
-
static void
ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
SelectionDAG &DAG, unsigned NewOp) {
- DebugLoc dl = Node->getDebugLoc();
+ SDLoc dl(Node);
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
@@ -5623,6 +5738,44 @@ ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
Results.push_back(Result.getValue(2));
}
+static void ReplaceREADCYCLECOUNTER(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ SDLoc DL(N);
+ SDValue Cycles32, OutChain;
+
+ if (Subtarget->hasPerfMon()) {
+ // Under Power Management extensions, the cycle-count is:
+ // mrc p15, #0, <Rt>, c9, c13, #0
+ SDValue Ops[] = { N->getOperand(0), // Chain
+ DAG.getConstant(Intrinsic::arm_mrc, MVT::i32),
+ DAG.getConstant(15, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(9, MVT::i32),
+ DAG.getConstant(13, MVT::i32),
+ DAG.getConstant(0, MVT::i32)
+ };
+
+ Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
+ DAG.getVTList(MVT::i32, MVT::Other), &Ops[0],
+ array_lengthof(Ops));
+ OutChain = Cycles32.getValue(1);
+ } else {
+ // Intrinsic is defined to return 0 on unsupported platforms. Technically
+ // there are older ARM CPUs that have implementation-specific ways of
+ // obtaining this information (FIXME!).
+ Cycles32 = DAG.getConstant(0, MVT::i32);
+ OutChain = DAG.getEntryNode();
+ }
+
+
+ SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
+ Cycles32, DAG.getConstant(0, MVT::i32));
+ Results.push_back(Cycles64);
+ Results.push_back(OutChain);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@@ -5700,6 +5853,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
break;
+ case ISD::READCYCLECOUNTER:
+ ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
+ return;
case ISD::ATOMIC_LOAD_ADD:
ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
return;
@@ -7634,13 +7790,13 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
// Slct is now know to be the desired identity constant when CC is true.
SDValue TrueVal = OtherOp;
- SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
OtherOp, NonConstantVal);
// Unless SwapSelectOps says CC should be false.
if (SwapSelectOps)
std::swap(TrueVal, FalseVal);
- return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
CCOp, TrueVal, FalseVal);
}
@@ -7747,9 +7903,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
llvm_unreachable("Invalid vector element type for padd optimization.");
}
- SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
widenType, &Ops[0], Ops.size());
- return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp);
}
static SDValue findMUL_LOHI(SDValue V) {
@@ -7874,7 +8030,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
Ops.push_back(*LowAdd);
Ops.push_back(*HiAdd);
- SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(),
+ SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),
DAG.getVTList(MVT::i32, MVT::i32),
&Ops[0], Ops.size());
@@ -7982,7 +8138,7 @@ static SDValue PerformVMULCombine(SDNode *N,
}
EVT VT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
return DAG.getNode(Opcode, DL, VT,
@@ -8012,11 +8168,11 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
int64_t MulAmt = C->getSExtValue();
- unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+ unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
ShiftAmt = ShiftAmt & (32 - 1);
SDValue V = N->getOperand(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDValue Res;
MulAmt >>= ShiftAmt;
@@ -8080,7 +8236,7 @@ static SDValue PerformANDCombine(SDNode *N,
// Attempt to use immediate-form VBIC
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
@@ -8123,7 +8279,7 @@ static SDValue PerformORCombine(SDNode *N,
const ARMSubtarget *Subtarget) {
// Attempt to use immediate-form VORR
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
@@ -8198,7 +8354,7 @@ static SDValue PerformORCombine(SDNode *N,
if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
return SDValue();
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
// 1) or (and A, mask), val => ARMbfi A, val, mask
// iff (val & mask) == val
//
@@ -8233,7 +8389,7 @@ static SDValue PerformORCombine(SDNode *N,
return SDValue();
if (ARM::isBitFieldInvertedMask(Mask)) {
- Val >>= CountTrailingZeros_32(~Mask);
+ Val >>= countTrailingZeros(~Mask);
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
DAG.getConstant(Val, MVT::i32),
@@ -8260,7 +8416,7 @@ static SDValue PerformORCombine(SDNode *N,
(Mask == 0xffff || Mask == 0xffff0000))
return SDValue();
// 2a
- unsigned amt = CountTrailingZeros_32(Mask2);
+ unsigned amt = countTrailingZeros(Mask2);
Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
DAG.getConstant(amt, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
@@ -8276,7 +8432,7 @@ static SDValue PerformORCombine(SDNode *N,
(Mask2 == 0xffff || Mask2 == 0xffff0000))
return SDValue();
// 2b
- unsigned lsb = CountTrailingZeros_32(Mask);
+ unsigned lsb = countTrailingZeros(Mask);
Res = DAG.getNode(ISD::SRL, DL, VT, N00,
DAG.getConstant(lsb, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
@@ -8294,7 +8450,7 @@ static SDValue PerformORCombine(SDNode *N,
// where lsb(mask) == #shamt and masked bits of B are known zero.
SDValue ShAmt = N00.getOperand(1);
unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
- unsigned LSB = CountTrailingZeros_32(Mask);
+ unsigned LSB = countTrailingZeros(Mask);
if (ShAmtC != LSB)
return SDValue();
@@ -8337,12 +8493,12 @@ static SDValue PerformBFICombine(SDNode *N,
if (!N11C)
return SDValue();
unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
- unsigned LSB = CountTrailingZeros_32(~InvMask);
- unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
+ unsigned LSB = countTrailingZeros(~InvMask);
+ unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
unsigned Mask = (1 << Width)-1;
unsigned Mask2 = N11C->getZExtValue();
if ((Mask & (~Mask2)) == 0)
- return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
+ return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
N->getOperand(0), N1.getOperand(0),
N->getOperand(2));
}
@@ -8368,7 +8524,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
LoadSDNode *LD = cast<LoadSDNode>(InNode);
SelectionDAG &DAG = DCI.DAG;
- DebugLoc DL = LD->getDebugLoc();
+ SDLoc DL(LD);
SDValue BasePtr = LD->getBasePtr();
SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
LD->getPointerInfo(), LD->isVolatile(),
@@ -8405,7 +8561,7 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
if (Op0.getOpcode() == ARMISD::VMOVRRD &&
Op0.getNode() == Op1.getNode() &&
Op0.getResNo() == 0 && Op1.getResNo() == 1)
- return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, SDLoc(N),
N->getValueType(0), Op0.getOperand(0));
return SDValue();
}
@@ -8447,7 +8603,7 @@ static SDValue PerformSTORECombine(SDNode *N,
NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
- DebugLoc DL = St->getDebugLoc();
+ SDLoc DL(St);
SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
@@ -8508,7 +8664,7 @@ static SDValue PerformSTORECombine(SDNode *N,
if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
StVal.getNode()->hasOneUse()) {
SelectionDAG &DAG = DCI.DAG;
- DebugLoc DL = St->getDebugLoc();
+ SDLoc DL(St);
SDValue BasePtr = St->getBasePtr();
SDValue NewST1 = DAG.getStore(St->getChain(), DL,
StVal.getNode()->getOperand(0), BasePtr,
@@ -8530,14 +8686,14 @@ static SDValue PerformSTORECombine(SDNode *N,
// Bitcast an i64 store extracted from a vector to f64.
// Otherwise, the i64 value will be legalized to a pair of i32 values.
SelectionDAG &DAG = DCI.DAG;
- DebugLoc dl = StVal.getDebugLoc();
+ SDLoc dl(StVal);
SDValue IntVec = StVal.getOperand(0);
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
IntVec.getValueType().getVectorNumElements());
SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
Vec, StVal.getOperand(1));
- dl = N->getDebugLoc();
+ dl = SDLoc(N);
SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
// Make the DAGCombiner fold the bitcasts.
DCI.AddToWorklist(Vec.getNode());
@@ -8583,7 +8739,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N,
EVT VT = N->getValueType(0);
if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
return SDValue();
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SmallVector<SDValue, 8> Ops;
unsigned NumElts = VT.getVectorNumElements();
for (unsigned i = 0; i < NumElts; ++i) {
@@ -8610,7 +8766,7 @@ static SDValue PerformInsertEltCombine(SDNode *N,
return SDValue();
SelectionDAG &DAG = DCI.DAG;
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
VT.getVectorNumElements());
SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
@@ -8656,7 +8812,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
!TLI.isTypeLegal(Concat1Op1.getValueType()))
return SDValue();
- SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
Op0.getOperand(0), Op1.getOperand(0));
// Translate the shuffle mask.
SmallVector<int, 16> NewMask;
@@ -8672,7 +8828,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
NewElt = HalfElts + MaskElt - NumElts;
NewMask.push_back(NewElt);
}
- return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
DAG.getUNDEF(VT), NewMask.data());
}
@@ -8789,7 +8945,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
Ops.push_back(N->getOperand(i));
}
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
- SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
Ops.data(), Ops.size(),
MemInt->getMemoryVT(),
MemInt->getMemOperand());
@@ -8863,7 +9019,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
- SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
+ SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
Ops, 2, VLDMemInt->getMemoryVT(),
VLDMemInt->getMemOperand());
@@ -8918,7 +9074,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
if (EltSize > VT.getVectorElementType().getSizeInBits())
return SDValue();
- return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+ return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
// isConstVecPow2 - Return true if each vector element is a power of 2, all
@@ -8977,7 +9133,7 @@ static SDValue PerformVCVTCombine(SDNode *N,
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
Intrinsic::arm_neon_vcvtfp2fxu;
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
N->getValueType(0),
DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
DAG.getConstant(Log2_64(C), MVT::i32));
@@ -9013,7 +9169,7 @@ static SDValue PerformVDIVCombine(SDNode *N,
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
Intrinsic::arm_neon_vcvtfxu2fp;
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
Op.getValueType(),
DAG.getConstant(IntrinsicOpcode, MVT::i32),
Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
@@ -9197,7 +9353,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
VShiftOpc = ARMISD::VQRSHRNsu; break;
}
- return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
}
@@ -9214,7 +9370,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
}
- return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2),
DAG.getConstant(Cnt, MVT::i32));
}
@@ -9245,7 +9401,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
DAG.MaskedValueIsZero(N0.getOperand(0),
APInt::getHighBitsSet(32, 16)))
- return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1);
+ return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
}
}
@@ -9262,7 +9418,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SHL:
if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
- return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
+ return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0),
DAG.getConstant(Cnt, MVT::i32));
break;
@@ -9271,7 +9427,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
ARMISD::VSHRs : ARMISD::VSHRu);
- return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
+ return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0),
DAG.getConstant(Cnt, MVT::i32));
}
}
@@ -9311,7 +9467,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
Opc = ARMISD::VGETLANEu;
break;
}
- return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
+ return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
}
}
@@ -9400,7 +9556,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
if (!Opcode)
return SDValue();
- return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+ return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
}
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
@@ -9412,7 +9568,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
return SDValue();
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue LHS = Cmp.getOperand(0);
SDValue RHS = Cmp.getOperand(1);
SDValue FalseVal = N->getOperand(0);
@@ -10358,17 +10514,15 @@ ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
bool ARM::isBitFieldInvertedMask(unsigned v) {
if (v == 0xffffffff)
- return 0;
+ return false;
+
// there can be 1's on either or both "outsides", all the "inside"
// bits must be 0's
- unsigned int lsb = 0, msb = 31;
- while (v & (1 << msb)) --msb;
- while (v & (1 << lsb)) ++lsb;
- for (unsigned int i = lsb; i <= msb; ++i) {
- if (v & (1 << i))
- return 0;
- }
- return 1;
+ unsigned TO = CountTrailingOnes_32(v);
+ unsigned LO = CountLeadingOnes_32(v);
+ v = (v >> TO) << TO;
+ v = (v << LO) >> LO;
+ return v == 0;
}
/// isFPImmLegal - Returns true if the target can instruction select the
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 46b8438676..2b65019df8 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -270,7 +270,7 @@ namespace llvm {
}
/// getSetCCResultType - Return the value type to use for ISD::SETCC.
- virtual EVT getSetCCResultType(EVT VT) const;
+ virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
virtual MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
@@ -412,7 +412,7 @@ namespace llvm {
void addQRTypeForNEON(MVT VT);
typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
- void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+ void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
SDValue Chain, SDValue &Arg,
RegsToPassVector &RegsToPass,
CCValAssign &VA, CCValAssign &NextVA,
@@ -421,12 +421,12 @@ namespace llvm {
ISD::ArgFlagsTy Flags) const;
SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
- DebugLoc dl) const;
+ SDLoc dl) const;
CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
bool isVarArg) const;
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
@@ -463,7 +463,7 @@ namespace llvm {
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
bool isThisReturn, SDValue ThisVal) const;
@@ -471,22 +471,26 @@ namespace llvm {
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain,
+ SDLoc dl, SDValue &Chain,
const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
unsigned OffsetFromOrigArg,
unsigned ArgOffset,
+ unsigned ArgSize,
bool ForceMutable) const;
void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain,
+ SDLoc dl, SDValue &Chain,
unsigned ArgOffset,
bool ForceMutable = false) const;
void computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned InRegsParamRecordIdx,
+ unsigned ArgSize,
unsigned &ArgRegsSize,
unsigned &ArgRegsSaveSize) const;
@@ -520,16 +524,16 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const;
+ SDLoc dl, SelectionDAG &DAG) const;
virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, DebugLoc dl) const;
+ SelectionDAG &DAG, SDLoc dl) const;
SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 67a6820932..bd9a212928 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1389,7 +1389,6 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
let Inst{15-12} = Dd{3-0};
let Inst{7-0} = addr{7-0}; // imm8
- // TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-24} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-9} = 0b101;
@@ -1415,7 +1414,6 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
let Inst{15-12} = Sd{4-1};
let Inst{7-0} = addr{7-0}; // imm8
- // TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-24} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-9} = 0b101;
@@ -1437,6 +1435,28 @@ class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
}
// Load / store multiple
+
+// Unknown precision
+class AXXI4<dag oops, dag iops, IndexMode im,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, 4, im,
+ VFPLdStFrm, NoItinerary, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = 0;
+ let Inst{15-12} = regs{11-8};
+ let Inst{7-1} = regs{7-1};
+
+ let Inst{27-25} = 0b110;
+ let Inst{11-8} = 0b1011;
+ let Inst{0} = 1;
+}
+
+// Double precision
class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: VFPXI<oops, iops, AddrMode4, 4, im,
@@ -1449,14 +1469,15 @@ class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
let Inst{19-16} = Rn;
let Inst{22} = regs{12};
let Inst{15-12} = regs{11-8};
- let Inst{7-0} = regs{7-0};
+ let Inst{7-1} = regs{7-1};
- // TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
let Inst{11-9} = 0b101;
let Inst{8} = 1; // Double precision
+ let Inst{0} = 0;
}
+// Single Precision
class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: VFPXI<oops, iops, AddrMode4, 4, im,
@@ -1471,7 +1492,6 @@ class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
let Inst{15-12} = regs{12-9};
let Inst{7-0} = regs{7-0};
- // TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
let Inst{11-9} = 0b101;
let Inst{8} = 0; // Single precision
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 80f0ec7437..8062111afa 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -29,7 +29,7 @@
using namespace llvm;
ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(*this, STI) {
+ : ARMBaseInstrInfo(STI), RI(STI) {
}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 1bd174e341..da815d563d 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -275,8 +275,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
-def IsLE : Predicate<"TLI.isLittleEndian()">;
-def IsBE : Predicate<"TLI.isBigEndian()">;
+def IsLE : Predicate<"TLI->isLittleEndian()">;
+def IsBE : Predicate<"TLI->isBigEndian()">;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -1327,7 +1327,7 @@ class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode>
: AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
[(set GPRnopc:$Rd, (opnode (rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> {
bits<4> Rd;
bits<4> Rm;
bits<2> rot;
@@ -1340,11 +1340,11 @@ class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode>
class AI_ext_rrot_np<bits<8> opcod, string opc>
: AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
IIC_iEXTr, opc, "\t$Rd, $Rm$rot", []>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> {
bits<2> rot;
let Inst{19-16} = 0b1111;
let Inst{11-10} = rot;
-}
+ }
/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
@@ -1353,7 +1353,7 @@ class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode>
IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot",
[(set GPRnopc:$Rd, (opnode GPR:$Rn,
(rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> {
bits<4> Rd;
bits<4> Rm;
bits<4> Rn;
@@ -1368,7 +1368,7 @@ class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode>
class AI_exta_rrot_np<bits<8> opcod, string opc>
: AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot),
IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> {
bits<4> Rn;
bits<2> rot;
let Inst{19-16} = Rn;
@@ -1780,7 +1780,8 @@ multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
!strconcat(opc, "\t$addr"),
- [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> {
+ [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]>,
+ Sched<[WritePreLd]> {
bits<4> Rt;
bits<17> addr;
let Inst{31-26} = 0b111101;
@@ -1796,7 +1797,8 @@ multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
!strconcat(opc, "\t$shift"),
- [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> {
+ [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]>,
+ Sched<[WritePreLd]> {
bits<17> shift;
let Inst{31-26} = 0b111101;
let Inst{25} = 1; // 1 for register form
@@ -1863,7 +1865,8 @@ def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
let isNotDuplicable = 1 in {
def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
4, IIC_iALUr,
- [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+ [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>,
+ Sched<[WriteALU, ReadALU]>;
let AddedComplexity = 10 in {
def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
@@ -1923,11 +1926,11 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
let hasSideEffects = 1 in {
def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
- 4, IIC_iALUi, []>;
+ 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
- 4, IIC_iALUi, []>;
+ 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
}
//===----------------------------------------------------------------------===//
@@ -1938,14 +1941,14 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
// ARMV4T and above
def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
"bx", "\tlr", [(ARMretflag)]>,
- Requires<[IsARM, HasV4T]> {
+ Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> {
let Inst{27-0} = 0b0001001011111111111100011110;
}
// ARMV4 only
def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
"mov", "\tpc, lr", [(ARMretflag)]>,
- Requires<[IsARM, NoV4T]> {
+ Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> {
let Inst{27-0} = 0b0001101000001111000000001110;
}
}
@@ -1955,7 +1958,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
// ARMV4T and above
def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
[(brind GPR:$dst)]>,
- Requires<[IsARM, HasV4T]> {
+ Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> {
bits<4> dst;
let Inst{31-4} = 0b1110000100101111111111110001;
let Inst{3-0} = dst;
@@ -1963,7 +1966,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br,
"bx", "\t$dst", [/* pattern left blank */]>,
- Requires<[IsARM, HasV4T]> {
+ Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> {
bits<4> dst;
let Inst{27-4} = 0b000100101111111111110001;
let Inst{3-0} = dst;
@@ -1980,7 +1983,7 @@ let isCall = 1,
def BL : ABXI<0b1011, (outs), (ins bl_target:$func),
IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>, Sched<[WriteBrL]> {
let Inst{31-28} = 0b1110;
bits<24> func;
let Inst{23-0} = func;
@@ -1990,7 +1993,7 @@ let isCall = 1,
def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func),
IIC_Br, "bl", "\t$func",
[(ARMcall_pred tglobaladdr:$func)]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>, Sched<[WriteBrL]> {
bits<24> func;
let Inst{23-0} = func;
let DecoderMethod = "DecodeBranchImmInstruction";
@@ -2000,7 +2003,7 @@ let isCall = 1,
def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm,
IIC_Br, "blx\t$func",
[(ARMcall GPR:$func)]>,
- Requires<[IsARM, HasV5T]> {
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
bits<4> func;
let Inst{31-4} = 0b1110000100101111111111110011;
let Inst{3-0} = func;
@@ -2009,7 +2012,7 @@ let isCall = 1,
def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm,
IIC_Br, "blx", "\t$func",
[(ARMcall_pred GPR:$func)]>,
- Requires<[IsARM, HasV5T]> {
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
bits<4> func;
let Inst{27-4} = 0b000100101111111111110011;
let Inst{3-0} = func;
@@ -2019,18 +2022,18 @@ let isCall = 1,
// Note: Restrict $func to the tGPR regclass to prevent it being in LR.
def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, HasV4T]>;
+ Requires<[IsARM, HasV4T]>, Sched<[WriteBr]>;
// ARMv4
def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, NoV4T]>;
+ Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>;
// mov lr, pc; b if callee is marked noreturn to avoid confusing the
// return stack predictor.
def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func),
8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
- Requires<[IsARM]>;
+ Requires<[IsARM]>, Sched<[WriteBr]>;
}
let isBranch = 1, isTerminator = 1 in {
@@ -2038,7 +2041,8 @@ let isBranch = 1, isTerminator = 1 in {
// a two-value operand where a dag node expects two operands. :(
def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
IIC_Br, "b", "\t$target",
- [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
+ [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>,
+ Sched<[WriteBr]> {
bits<24> target;
let Inst{23-0} = target;
let DecoderMethod = "DecodeBranchImmInstruction";
@@ -2051,25 +2055,27 @@ let isBranch = 1, isTerminator = 1 in {
// should be sufficient.
// FIXME: Is B really a Barrier? That doesn't seem right.
def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br,
- [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>;
+ [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>,
+ Sched<[WriteBr]>;
let isNotDuplicable = 1, isIndirectBranch = 1 in {
def BR_JTr : ARMPseudoInst<(outs),
(ins GPR:$target, i32imm:$jt, i32imm:$id),
0, IIC_Br,
- [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+ [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>,
+ Sched<[WriteBr]>;
// FIXME: This shouldn't use the generic "addrmode2," but rather be split
// into i12 and rs suffixed versions.
def BR_JTm : ARMPseudoInst<(outs),
(ins addrmode2:$target, i32imm:$jt, i32imm:$id),
0, IIC_Br,
[(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
- imm:$id)]>;
+ imm:$id)]>, Sched<[WriteBrTbl]>;
def BR_JTadd : ARMPseudoInst<(outs),
(ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
0, IIC_Br,
[(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
- imm:$id)]>;
+ imm:$id)]>, Sched<[WriteBrTbl]>;
} // isNotDuplicable = 1, isIndirectBranch = 1
} // isBarrier = 1
@@ -2078,7 +2084,7 @@ let isBranch = 1, isTerminator = 1 in {
// BLX (immediate)
def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary,
"blx\t$target", []>,
- Requires<[IsARM, HasV5T]> {
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
let Inst{31-25} = 0b1111101;
bits<25> target;
let Inst{23-0} = target{24-1};
@@ -2087,7 +2093,7 @@ def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary,
// Branch and Exchange Jazelle
def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
- [/* pattern left blank */]> {
+ [/* pattern left blank */]>, Sched<[WriteBr]> {
bits<4> func;
let Inst{23-20} = 0b0010;
let Inst{19-8} = 0xfff;
@@ -2098,18 +2104,20 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
// Tail calls.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
- def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>;
+ def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>,
+ Sched<[WriteBr]>;
- def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>;
+ def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>,
+ Sched<[WriteBr]>;
def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst),
4, IIC_Br, [],
(Bcc br_target:$dst, (ops 14, zero_reg))>,
- Requires<[IsARM]>;
+ Requires<[IsARM]>, Sched<[WriteBr]>;
def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst),
4, IIC_Br, [],
- (BX GPR:$dst)>,
+ (BX GPR:$dst)>, Sched<[WriteBr]>,
Requires<[IsARM]>;
}
@@ -2123,7 +2131,8 @@ def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
// Supervisor Call (Software Interrupt)
let isCall = 1, Uses = [SP] in {
-def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []> {
+def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []>,
+ Sched<[WriteBr]> {
bits<24> svc;
let Inst{23-0} = svc;
}
@@ -2955,7 +2964,7 @@ defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
let neverHasSideEffects = 1 in
def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
- "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<4> Rm;
@@ -2969,7 +2978,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
// A version for the smaller set of tail call registers.
let neverHasSideEffects = 1 in
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
- IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<4> Rm;
@@ -2982,7 +2991,8 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src),
DPSoRegRegFrm, IIC_iMOVsr,
"mov", "\t$Rd, $src",
- [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP {
+ [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP,
+ Sched<[WriteALU]> {
bits<4> Rd;
bits<12> src;
let Inst{15-12} = Rd;
@@ -2998,7 +3008,7 @@ def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src),
def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src),
DPSoRegImmFrm, IIC_iMOVsr,
"mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>,
- UnaryDP {
+ UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<12> src;
let Inst{15-12} = Rd;
@@ -3011,7 +3021,8 @@ def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src),
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
- "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP {
+ "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP,
+ Sched<[WriteALU]> {
bits<4> Rd;
bits<12> imm;
let Inst{25} = 1;
@@ -3025,7 +3036,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm),
DPFrm, IIC_iMOVi,
"movw", "\t$Rd, $imm",
[(set GPR:$Rd, imm0_65535:$imm)]>,
- Requires<[IsARM, HasV6T2]>, UnaryDP {
+ Requires<[IsARM, HasV6T2]>, UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<16> imm;
let Inst{15-12} = Rd;
@@ -3041,7 +3052,8 @@ def : InstAlias<"mov${p} $Rd, $imm",
Requires<[IsARM]>;
def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
- (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
+ Sched<[WriteALU]>;
let Constraints = "$src = $Rd" in {
def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
@@ -3051,7 +3063,7 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
[(set GPRnopc:$Rd,
(or (and GPR:$src, 0xffff),
lo16AllZero:$imm))]>, UnaryDP,
- Requires<[IsARM, HasV6T2]> {
+ Requires<[IsARM, HasV6T2]>, Sched<[WriteALU]> {
bits<4> Rd;
bits<16> imm;
let Inst{15-12} = Rd;
@@ -3063,7 +3075,8 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
}
def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
- (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+ (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
+ Sched<[WriteALU]>;
} // Constraints
@@ -3073,7 +3086,7 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
let Uses = [CPSR] in
def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
[(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
- Requires<[IsARM]>;
+ Requires<[IsARM]>, Sched<[WriteALU]>;
// These aren't really mov instructions, but we have to define them this way
// due to flag operands.
@@ -3081,10 +3094,10 @@ def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
let Defs = [CPSR] in {
def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
[(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
- Requires<[IsARM]>;
+ Sched<[WriteALU]>, Requires<[IsARM]>;
def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
[(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
- Requires<[IsARM]>;
+ Sched<[WriteALU]>, Requires<[IsARM]>;
}
//===----------------------------------------------------------------------===//
@@ -3250,7 +3263,8 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
list<dag> pattern = [],
dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm),
string asm = "\t$Rd, $Rn, $Rm">
- : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
+ : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rn;
bits<4> Rd;
bits<4> Rm;
@@ -3265,9 +3279,11 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
// Saturating add/subtract
+let DecoderMethod = "DecodeQADDInstruction" in
def QADD : AAI<0b00010000, 0b00000101, "qadd",
[(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))],
(ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
+
def QSUB : AAI<0b00010010, 0b00000101, "qsub",
[(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))],
(ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
@@ -3326,7 +3342,7 @@ def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
MulFrm /* for convenience */, NoItinerary, "usad8",
"\t$Rd, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -3340,7 +3356,7 @@ def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
MulFrm /* for convenience */, NoItinerary, "usada8",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -3473,7 +3489,7 @@ def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
"mvn", "\t$Rd, $Rm",
- [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
+ [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<4> Rm;
let Inst{25} = 0;
@@ -3484,7 +3500,8 @@ def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
}
def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift),
DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
- [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP {
+ [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP,
+ Sched<[WriteALU]> {
bits<4> Rd;
bits<12> shift;
let Inst{25} = 0;
@@ -3496,7 +3513,8 @@ def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift),
}
def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift),
DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
- [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP {
+ [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP,
+ Sched<[WriteALU]> {
bits<4> Rd;
bits<12> shift;
let Inst{25} = 0;
@@ -3511,7 +3529,7 @@ def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift),
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
IIC_iMVNi, "mvn", "\t$Rd, $imm",
- [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP {
+ [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<12> imm;
let Inst{25} = 1;
@@ -4022,7 +4040,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs),
let isCompare = 1, Defs = [CPSR] in {
def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi,
"cmn", "\t$Rn, $imm",
- [(ARMcmn GPR:$Rn, so_imm:$imm)]> {
+ [(ARMcmn GPR:$Rn, so_imm:$imm)]>,
+ Sched<[WriteCMP, ReadALU]> {
bits<4> Rn;
bits<12> imm;
let Inst{25} = 1;
@@ -4038,7 +4057,7 @@ def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi,
def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr,
"cmn", "\t$Rn, $Rm",
[(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPR:$Rn, GPR:$Rm)]> {
+ GPR:$Rn, GPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> {
bits<4> Rn;
bits<4> Rm;
let isCommutable = 1;
@@ -4056,7 +4075,8 @@ def CMNzrsi : AI1<0b1011, (outs),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr,
"cmn", "\t$Rn, $shift",
[(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPR:$Rn, so_reg_imm:$shift)]> {
+ GPR:$Rn, so_reg_imm:$shift)]>,
+ Sched<[WriteCMPsi, ReadALU]> {
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
@@ -4074,7 +4094,8 @@ def CMNzrsr : AI1<0b1011, (outs),
(ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr,
"cmn", "\t$Rn, $shift",
[(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPRnopc:$Rn, so_reg_reg:$shift)]> {
+ GPRnopc:$Rn, so_reg_reg:$shift)]>,
+ Sched<[WriteCMPsr, ReadALU]> {
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
@@ -4112,11 +4133,13 @@ let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
def BCCi64 : PseudoInst<(outs),
(ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
IIC_Br,
- [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>,
+ Sched<[WriteBr]>;
def BCCZi64 : PseudoInst<(outs),
(ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br,
- [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>,
+ Sched<[WriteBr]>;
} // usesCustomInserter
@@ -4129,20 +4152,20 @@ let isCommutable = 1, isSelect = 1 in
def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
4, IIC_iCMOVr,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$false, so_reg_imm:$shift, pred:$p),
4, IIC_iCMOVsr,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift,
imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$false, so_reg_reg:$shift, pred:$p),
4, IIC_iCMOVsr,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
@@ -4150,14 +4173,15 @@ def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$false, imm0_65535_expr:$imm, pred:$p),
4, IIC_iMOVi,
[]>,
- RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
+ Sched<[WriteALU]>;
let isMoveImm = 1 in
def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$false, so_imm:$imm, pred:$p),
4, IIC_iCMOVi,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
// Two instruction predicate mov immediate.
let isMoveImm = 1 in
@@ -4170,7 +4194,7 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$false, so_imm:$imm, pred:$p),
4, IIC_iCMOVi,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
} // neverHasSideEffects
@@ -4189,6 +4213,16 @@ def memb_opt : Operand<i32> {
let DecoderMethod = "DecodeMemBarrierOption";
}
+def InstSyncBarrierOptOperand : AsmOperandClass {
+ let Name = "InstSyncBarrierOpt";
+ let ParserMethod = "parseInstSyncBarrierOptOperand";
+}
+def instsyncb_opt : Operand<i32> {
+ let PrintMethod = "printInstSyncBOption";
+ let ParserMatchClass = InstSyncBarrierOptOperand;
+ let DecoderMethod = "DecodeInstSyncBarrierOption";
+}
+
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
@@ -4209,7 +4243,7 @@ def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
}
// ISB has only full system option
-def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary,
"isb", "\t$opt", []>,
Requires<[IsARM, HasDB]> {
bits<4> opt;
@@ -4636,11 +4670,11 @@ def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
- (outs GPR:$Rt),
+ (outs GPRwithAPSR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>;
def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
- (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
@@ -4650,7 +4684,7 @@ class MovRCopro2<string opc, bit direction, dag oops, dag iops,
list<dag> pattern>
: ABXI<0b1110, oops, iops, NoItinerary,
!strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> {
- let Inst{31-28} = 0b1111;
+ let Inst{31-24} = 0b11111110;
let Inst{20} = direction;
let Inst{4} = 1;
@@ -4679,11 +4713,11 @@ def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0)>;
def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
- (outs GPR:$Rt),
+ (outs GPRwithAPSR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>;
def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
- (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0)>;
def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
@@ -4820,7 +4854,7 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
let isCall = 1,
Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
- [(set R0, ARMthread_pointer)]>;
+ [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>;
}
//===----------------------------------------------------------------------===//
@@ -4884,7 +4918,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst),
4, IIC_Br, [(brind GPR:$dst)],
(MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
- Requires<[IsARM, NoV4T]>;
+ Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>;
// Large immediate handling.
@@ -5233,7 +5267,7 @@ def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm",
cc_out:$s)>;
}
def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm",
- (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+ (ins GPR:$Rd, GPR:$Rm, pred:$p, cc_out:$s)>;
let TwoOperandAliasConstraint = "$Rn = $Rd" in {
def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm",
(ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 896fd0f785..9d1a8ea38b 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -626,7 +626,7 @@ class VLD1D<bits<4> op7_4, string Dt>
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
class VLD1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
@@ -634,7 +634,7 @@ class VLD1Q<bits<4> op7_4, string Dt>
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
@@ -655,7 +655,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
@@ -663,7 +663,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> {
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
@@ -674,7 +674,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
@@ -682,7 +682,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> {
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
@@ -703,7 +703,7 @@ class VLD1D3<bits<4> op7_4, string Dt>
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
@@ -712,7 +712,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
@@ -720,7 +720,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
@@ -744,7 +744,7 @@ class VLD1D4<bits<4> op7_4, string Dt>
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
@@ -753,7 +753,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
@@ -761,7 +761,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
@@ -786,7 +786,7 @@ class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
@@ -810,7 +810,7 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST2Instruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
@@ -818,7 +818,7 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
"vld2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST2Instruction";
let AsmMatchConverter = "cvtVLDwbRegister";
}
}
@@ -853,7 +853,7 @@ class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST3Instruction";
}
def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
@@ -872,7 +872,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST3Instruction";
}
def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
@@ -912,7 +912,7 @@ class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST4Instruction";
}
def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
@@ -931,7 +931,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST4Instruction";
}
def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
@@ -1580,14 +1580,14 @@ class VST1D<bits<4> op7_4, string Dt>
IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
class VST1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def VST1d8 : VST1D<{0,0,0,?}, "8">;
@@ -1608,7 +1608,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
@@ -1617,7 +1617,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> {
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
@@ -1628,7 +1628,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
@@ -1637,7 +1637,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> {
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
@@ -1659,7 +1659,7 @@ class VST1D3<bits<4> op7_4, string Dt>
IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VST1D3WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
@@ -1668,7 +1668,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
@@ -1677,7 +1677,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> {
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
@@ -1704,7 +1704,7 @@ class VST1D4<bits<4> op7_4, string Dt>
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VST1D4WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
@@ -1713,7 +1713,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
@@ -1722,7 +1722,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> {
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
@@ -1748,7 +1748,7 @@ class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
itin, "vst2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
@@ -1772,7 +1772,7 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST2Instruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -1780,7 +1780,7 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST2Instruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
@@ -1791,7 +1791,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST2Instruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
@@ -1800,7 +1800,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> {
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST2Instruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
@@ -1835,7 +1835,7 @@ class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST3Instruction";
}
def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
@@ -1854,7 +1854,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST3Instruction";
}
def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
@@ -1894,7 +1894,7 @@ class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
"", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST4Instruction";
}
def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
@@ -1913,7 +1913,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST4Instruction";
}
def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
@@ -5509,8 +5509,9 @@ class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
(Ty DPR:$Vm), imm:$index)))]> {
- bits<4> index;
- let Inst{11-8} = index{3-0};
+ bits<3> index;
+ let Inst{11} = 0b0;
+ let Inst{10-8} = index{2-0};
}
class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
@@ -5525,14 +5526,14 @@ class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
}
def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
- let Inst{11-8} = index{3-0};
+ let Inst{10-8} = index{2-0};
}
def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
- let Inst{11-9} = index{2-0};
+ let Inst{10-9} = index{1-0};
let Inst{8} = 0b0;
}
def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
- let Inst{11-10} = index{1-0};
+ let Inst{10} = index{0};
let Inst{9-8} = 0b00;
}
def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index ae7a5c00bd..1fff41db27 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -310,7 +310,7 @@ def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
let isNotDuplicable = 1, isCodeGenOnly = 1 in
def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
[(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
- T1Special<{0,0,?,?}> {
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
// A8.6.6
bits<3> dst;
let Inst{6-3} = 0b1111; // Rm = pc
@@ -323,7 +323,7 @@ def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
// probably because the instruction can be moved around.
def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
IIC_iALUi, "add", "\t$dst, $sp, $imm", []>,
- T1Encoding<{1,0,1,0,1,?}> {
+ T1Encoding<{1,0,1,0,1,?}>, Sched<[WriteALU]> {
// A6.2 & A8.6.8
bits<3> dst;
bits<8> imm;
@@ -335,7 +335,7 @@ def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
// ADD sp, sp, #<imm7>
def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
IIC_iALUi, "add", "\t$Rdn, $imm", []>,
- T1Misc<{0,0,0,0,0,?,?}> {
+ T1Misc<{0,0,0,0,0,?,?}>, Sched<[WriteALU]> {
// A6.2.5 & A8.6.8
bits<7> imm;
let Inst{6-0} = imm;
@@ -346,7 +346,7 @@ def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
// FIXME: The encoding and the ASM string don't match up.
def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
IIC_iALUi, "sub", "\t$Rdn, $imm", []>,
- T1Misc<{0,0,0,0,1,?,?}> {
+ T1Misc<{0,0,0,0,1,?,?}>, Sched<[WriteALU]> {
// A6.2.5 & A8.6.214
bits<7> imm;
let Inst{6-0} = imm;
@@ -367,7 +367,7 @@ def : tInstAlias<"sub${p} sp, sp, $imm",
// ADD <Rm>, sp
def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr,
"add", "\t$Rdn, $sp, $Rn", []>,
- T1Special<{0,0,?,?}> {
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
// A8.6.9 Encoding T1
bits<4> Rdn;
let Inst{7} = Rdn{3};
@@ -379,7 +379,7 @@ def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr,
// ADD sp, <Rm>
def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr,
"add", "\t$Rdn, $Rm", []>,
- T1Special<{0,0,?,?}> {
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
// A8.6.9 Encoding T2
bits<4> Rm;
let Inst{7} = 1;
@@ -395,7 +395,7 @@ def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr,
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>,
- T1Special<{1,1,0,?}> {
+ T1Special<{1,1,0,?}>, Sched<[WriteBr]> {
// A6.2.3 & A8.6.25
bits<4> Rm;
let Inst{6-3} = Rm;
@@ -406,12 +406,12 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br,
- [(ARMretflag)], (tBX LR, pred:$p)>;
+ [(ARMretflag)], (tBX LR, pred:$p)>, Sched<[WriteBr]>;
// Alternative return instruction used by vararg functions.
def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p),
2, IIC_Br, [],
- (tBX GPR:$Rm, pred:$p)>;
+ (tBX GPR:$Rm, pred:$p)>, Sched<[WriteBr]>;
}
// All calls clobber the non-callee saved registers. SP is marked as a use to
@@ -424,7 +424,7 @@ let isCall = 1,
(outs), (ins pred:$p, t_bltarget:$func), IIC_Br,
"bl${p}\t$func",
[(ARMtcall tglobaladdr:$func)]>,
- Requires<[IsThumb]> {
+ Requires<[IsThumb]>, Sched<[WriteBrL]> {
bits<24> func;
let Inst{26} = func{23};
let Inst{25-16} = func{20-11};
@@ -438,7 +438,7 @@ let isCall = 1,
(outs), (ins pred:$p, t_blxtarget:$func), IIC_Br,
"blx${p}\t$func",
[(ARMcall tglobaladdr:$func)]>,
- Requires<[IsThumb, HasV5T]> {
+ Requires<[IsThumb, HasV5T]>, Sched<[WriteBrL]> {
bits<24> func;
let Inst{26} = func{23};
let Inst{25-16} = func{20-11};
@@ -453,7 +453,7 @@ let isCall = 1,
"blx${p}\t$func",
[(ARMtcall GPR:$func)]>,
Requires<[IsThumb, HasV5T]>,
- T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24;
+ T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24;
bits<4> func;
let Inst{6-3} = func;
let Inst{2-0} = 0b000;
@@ -463,14 +463,14 @@ let isCall = 1,
def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func),
4, IIC_Br,
[(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsThumb, IsThumb1Only]>;
+ Requires<[IsThumb, IsThumb1Only]>, Sched<[WriteBr]>;
}
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isPredicable = 1 in
def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br,
"b", "\t$target", [(br bb:$target)]>,
- T1Encoding<{1,1,1,0,0,?}> {
+ T1Encoding<{1,1,1,0,0,?}>, Sched<[WriteBr]> {
bits<11> target;
let Inst{10-0} = target;
}
@@ -480,12 +480,14 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
// the clobber of LR.
let Defs = [LR] in
def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p),
- 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>;
+ 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>,
+ Sched<[WriteBrTbl]>;
def tBR_JTr : tPseudoInst<(outs),
(ins tGPR:$target, i32imm:$jt, i32imm:$id),
0, IIC_Br,
- [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> {
+ [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>,
+ Sched<[WriteBrTbl]> {
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
}
}
@@ -496,7 +498,7 @@ let isBranch = 1, isTerminator = 1 in
def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br,
"b${p}\t$target",
[/*(ARMbrcond bb:$target, imm:$cc)*/]>,
- T1BranchCond<{1,1,0,1}> {
+ T1BranchCond<{1,1,0,1}>, Sched<[WriteBr]> {
bits<4> p;
bits<8> target;
let Inst{11-8} = p;
@@ -510,7 +512,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst),
4, IIC_Br, [],
(tBX GPR:$dst, (ops 14, zero_reg))>,
- Requires<[IsThumb]>;
+ Requires<[IsThumb]>, Sched<[WriteBr]>;
}
// tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls
// on IOS), so it's in ARMInstrThumb2.td.
@@ -520,7 +522,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
(ins t_brtarget:$dst, pred:$p),
4, IIC_Br, [],
(tB t_brtarget:$dst, pred:$p)>,
- Requires<[IsThumb, IsNotIOS]>;
+ Requires<[IsThumb, IsNotIOS]>, Sched<[WriteBr]>;
}
}
@@ -530,7 +532,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// If Inst{11-8} == 0b1111 then SEE SVC
let isCall = 1, Uses = [SP] in
def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br,
- "svc", "\t$imm", []>, Encoding16 {
+ "svc", "\t$imm", []>, Encoding16, Sched<[WriteBr]> {
bits<8> imm;
let Inst{15-12} = 0b1101;
let Inst{11-8} = 0b1111;
@@ -540,7 +542,7 @@ def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br,
// The assembler uses 0xDEFE for a trap instruction.
let isBarrier = 1, isTerminator = 1 in
def tTRAP : TI<(outs), (ins), IIC_Br,
- "trap", [(trap)]>, Encoding16 {
+ "trap", [(trap)]>, Encoding16, Sched<[WriteBr]> {
let Inst = 0xdefe;
}
@@ -833,14 +835,15 @@ let isCommutable = 1, Uses = [CPSR] in
def tADC : // A8.6.2
T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
"adc", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// Add immediate
def tADDi3 : // A8.6.4 T1
T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
IIC_iALUi,
"add", "\t$Rd, $Rm, $imm3",
- [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]>,
+ Sched<[WriteALU]> {
bits<3> imm3;
let Inst{8-6} = imm3;
}
@@ -849,7 +852,8 @@ def tADDi8 : // A8.6.4 T2
T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn),
(ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
"add", "\t$Rdn, $imm8",
- [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>;
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>,
+ Sched<[WriteALU]>;
// Add register
let isCommutable = 1 in
@@ -857,12 +861,12 @@ def tADDrr : // A8.6.6 T1
T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iALUr,
"add", "\t$Rd, $Rn, $Rm",
- [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
let neverHasSideEffects = 1 in
def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
"add", "\t$Rdn, $Rm", []>,
- T1Special<{0,0,?,?}> {
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
// A8.6.6 T2
bits<4> Rdn;
bits<4> Rm;
@@ -877,14 +881,15 @@ def tAND : // A8.6.12
T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iBITr,
"and", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// ASR immediate
def tASRri : // A8.6.14
T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
IIC_iMOVsi,
"asr", "\t$Rd, $Rm, $imm5",
- [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]> {
+ [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]>,
+ Sched<[WriteALU]> {
bits<5> imm5;
let Inst{10-6} = imm5;
}
@@ -894,14 +899,15 @@ def tASRrr : // A8.6.15
T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iMOVsr,
"asr", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// BIC register
def tBIC : // A8.6.20
T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iBITr,
"bic", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>;
+ [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>,
+ Sched<[WriteALU]>;
// CMN register
let isCompare = 1, Defs = [CPSR] in {
@@ -917,7 +923,7 @@ def tCMNz : // A8.6.33
T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iCMPr,
"cmn", "\t$Rn, $Rm",
- [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>;
+ [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>, Sched<[WriteCMP]>;
} // isCompare = 1, Defs = [CPSR]
@@ -926,7 +932,7 @@ let isCompare = 1, Defs = [CPSR] in {
def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi,
"cmp", "\t$Rn, $imm8",
[(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
- T1General<{1,0,1,?,?}> {
+ T1General<{1,0,1,?,?}>, Sched<[WriteCMP]> {
// A8.6.35
bits<3> Rn;
bits<8> imm8;
@@ -939,11 +945,11 @@ def tCMPr : // A8.6.36 T1
T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iCMPr,
"cmp", "\t$Rn, $Rm",
- [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>;
+ [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>, Sched<[WriteCMP]>;
def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
"cmp", "\t$Rn, $Rm", []>,
- T1Special<{0,1,?,?}> {
+ T1Special<{0,1,?,?}>, Sched<[WriteCMP]> {
// A8.6.36 T2
bits<4> Rm;
bits<4> Rn;
@@ -960,14 +966,15 @@ def tEOR : // A8.6.45
T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iBITr,
"eor", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// LSL immediate
def tLSLri : // A8.6.88
T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_31:$imm5),
IIC_iMOVsi,
"lsl", "\t$Rd, $Rm, $imm5",
- [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> {
+ [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]>,
+ Sched<[WriteALU]> {
bits<5> imm5;
let Inst{10-6} = imm5;
}
@@ -977,14 +984,15 @@ def tLSLrr : // A8.6.89
T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iMOVsr,
"lsl", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// LSR immediate
def tLSRri : // A8.6.90
T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
IIC_iMOVsi,
"lsr", "\t$Rd, $Rm, $imm5",
- [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]> {
+ [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]>,
+ Sched<[WriteALU]> {
bits<5> imm5;
let Inst{10-6} = imm5;
}
@@ -994,14 +1002,14 @@ def tLSRrr : // A8.6.91
T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iMOVsr,
"lsr", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// Move register
let isMoveImm = 1 in
def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
"mov", "\t$Rd, $imm8",
[(set tGPR:$Rd, imm0_255:$imm8)]>,
- T1General<{1,0,0,?,?}> {
+ T1General<{1,0,0,?,?}>, Sched<[WriteALU]> {
// A8.6.96
bits<3> Rd;
bits<8> imm8;
@@ -1019,7 +1027,7 @@ let neverHasSideEffects = 1 in {
def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
2, IIC_iMOVr,
"mov", "\t$Rd, $Rm", "", []>,
- T1Special<{1,0,?,?}> {
+ T1Special<{1,0,?,?}>, Sched<[WriteALU]> {
// A8.6.97
bits<4> Rd;
bits<4> Rm;
@@ -1029,7 +1037,7 @@ def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
}
let Defs = [CPSR] in
def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
- "movs\t$Rd, $Rm", []>, Encoding16 {
+ "movs\t$Rd, $Rm", []>, Encoding16, Sched<[WriteALU]> {
// A8.6.97
bits<3> Rd;
bits<3> Rm;
@@ -1060,7 +1068,7 @@ def :tInstAlias<"mul${s}${p} $Rdm, $Rn", (tMUL tGPR:$Rdm, s_cc_out:$s, tGPR:$Rn,
def tMVN : // A8.6.107
T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr,
"mvn", "\t$Rd, $Rn",
- [(set tGPR:$Rd, (not tGPR:$Rn))]>;
+ [(set tGPR:$Rd, (not tGPR:$Rn))]>, Sched<[WriteALU]>;
// Bitwise or register
let isCommutable = 1 in
@@ -1068,7 +1076,7 @@ def tORR : // A8.6.114
T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iBITr,
"orr", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// Swaps
def tREV : // A8.6.134
@@ -1076,35 +1084,36 @@ def tREV : // A8.6.134
IIC_iUNAr,
"rev", "\t$Rd, $Rm",
[(set tGPR:$Rd, (bswap tGPR:$Rm))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>;
def tREV16 : // A8.6.135
T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
IIC_iUNAr,
"rev16", "\t$Rd, $Rm",
[(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>;
def tREVSH : // A8.6.136
T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
IIC_iUNAr,
"revsh", "\t$Rd, $Rm",
[(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>;
// Rotate right register
def tROR : // A8.6.139
T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iMOVsr,
"ror", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>,
+ Sched<[WriteALU]>;
// Negate register
def tRSB : // A8.6.141
T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn),
IIC_iALUi,
"rsb", "\t$Rd, $Rn, #0",
- [(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
+ [(set tGPR:$Rd, (ineg tGPR:$Rn))]>, Sched<[WriteALU]>;
// Subtract with carry register
let Uses = [CPSR] in
@@ -1112,14 +1121,16 @@ def tSBC : // A8.6.151
T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iALUr,
"sbc", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>,
+ Sched<[WriteALU]>;
// Subtract immediate
def tSUBi3 : // A8.6.210 T1
T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
IIC_iALUi,
"sub", "\t$Rd, $Rm, $imm3",
- [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> {
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]>,
+ Sched<[WriteALU]> {
bits<3> imm3;
let Inst{8-6} = imm3;
}
@@ -1128,14 +1139,16 @@ def tSUBi8 : // A8.6.210 T2
T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn),
(ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
"sub", "\t$Rdn, $imm8",
- [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>;
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>,
+ Sched<[WriteALU]>;
// Subtract register
def tSUBrr : // A8.6.212
T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iALUr,
"sub", "\t$Rd, $Rn, $Rm",
- [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>,
+ Sched<[WriteALU]>;
// Sign-extend byte
def tSXTB : // A8.6.222
@@ -1143,7 +1156,8 @@ def tSXTB : // A8.6.222
IIC_iUNAr,
"sxtb", "\t$Rd, $Rm",
[(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>,
+ Sched<[WriteALU]>;
// Sign-extend short
def tSXTH : // A8.6.224
@@ -1151,14 +1165,16 @@ def tSXTH : // A8.6.224
IIC_iUNAr,
"sxth", "\t$Rd, $Rm",
[(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>,
+ Sched<[WriteALU]>;
// Test
let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
def tTST : // A8.6.230
T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
"tst", "\t$Rn, $Rm",
- [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>;
+ [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>,
+ Sched<[WriteALU]>;
// Zero-extend byte
def tUXTB : // A8.6.262
@@ -1166,7 +1182,8 @@ def tUXTB : // A8.6.262
IIC_iUNAr,
"uxtb", "\t$Rd, $Rm",
[(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>,
+ Sched<[WriteALU]>;
// Zero-extend short
def tUXTH : // A8.6.264
@@ -1174,7 +1191,7 @@ def tUXTH : // A8.6.264
IIC_iUNAr,
"uxth", "\t$Rd, $Rm",
[(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>;
// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
// Expanded after instruction selection into a branch sequence.
@@ -1189,7 +1206,7 @@ let usesCustomInserter = 1 in // Expanded after instruction selection.
def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
IIC_iALUi, "adr{$p}\t$Rd, $addr", []>,
- T1Encoding<{1,0,1,0,0,?}> {
+ T1Encoding<{1,0,1,0,0,?}>, Sched<[WriteALU]> {
bits<3> Rd;
bits<8> addr;
let Inst{10-8} = Rd;
@@ -1199,12 +1216,12 @@ def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
let neverHasSideEffects = 1, isReMaterializable = 1 in
def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
- 2, IIC_iALUi, []>;
+ 2, IIC_iALUi, []>, Sched<[WriteALU]>;
let hasSideEffects = 1 in
def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
- 2, IIC_iALUi, []>;
+ 2, IIC_iALUi, []>, Sched<[WriteALU]>;
//===----------------------------------------------------------------------===//
// TLS Instructions
@@ -1215,7 +1232,8 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
// complete with fixup for the aeabi_read_tp function.
let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in
def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
- [(set R0, ARMthread_pointer)]>;
+ [(set R0, ARMthread_pointer)]>,
+ Sched<[WriteBr]>;
//===----------------------------------------------------------------------===//
// SJLJ Exception handling intrinsics
@@ -1381,13 +1399,13 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
hasExtraDefRegAllocReq = 1 in
def tPOP_RET : tPseudoExpand<(outs), (ins pred:$p, reglist:$regs, variable_ops),
2, IIC_iPop_Br, [],
- (tPOP pred:$p, reglist:$regs)>;
+ (tPOP pred:$p, reglist:$regs)>, Sched<[WriteBrL]>;
// Indirect branch using "mov pc, $Rm"
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def tBRIND : tPseudoExpand<(outs), (ins GPR:$Rm, pred:$p),
2, IIC_Br, [(brind GPR:$Rm)],
- (tMOVr PC, GPR:$Rm, pred:$p)>;
+ (tMOVr PC, GPR:$Rm, pred:$p)>, Sched<[WriteBr]>;
}
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 4dacb86df4..ff21bf70ec 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -554,7 +554,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
def ri : T2sTwoRegImm<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
opc, "\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -563,7 +564,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
// register
def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir,
opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"),
- [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -576,7 +578,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
def rs : T2sTwoRegShiftedReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"),
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+ Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -635,7 +638,8 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
def ri : T2sTwoRegImm<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
opc, ".w\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -645,7 +649,8 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
def rr : T2sThreeReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
opc, "\t$Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]> {
+ [/* For disassembly only; pattern left blank */]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -657,7 +662,8 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
def rs : T2sTwoRegShiftedReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -678,12 +684,14 @@ multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
(ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p),
4, iii,
[(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
- t2_so_imm:$imm))]>;
+ t2_so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]>;
// register
def rr : t2PseudoInst<(outs rGPR:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm, pred:$p),
4, iir,
[(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
- rGPR:$Rm))]> {
+ rGPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
let isCommutable = Commutable;
}
// shifted register
@@ -691,7 +699,8 @@ multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
(ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
4, iis,
[(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
- t2_so_reg:$ShiftedRm))]>;
+ t2_so_reg:$ShiftedRm))]>,
+ Sched<[WriteALUsi, ReadALUsr]>;
}
}
@@ -704,13 +713,15 @@ multiclass T2I_rbin_s_is<PatFrag opnode> {
(ins rGPR:$Rn, t2_so_imm:$imm, pred:$p),
4, IIC_iALUi,
[(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm,
- rGPR:$Rn))]>;
+ rGPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]>;
// shifted register
def rs : t2PseudoInst<(outs rGPR:$Rd),
(ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
4, IIC_iALUsi,
[(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm,
- rGPR:$Rn))]>;
+ rGPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]>;
}
}
@@ -725,7 +736,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
def ri : T2sTwoRegImm<
(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi,
opc, ".w\t$Rd, $Rn, $imm",
- [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]> {
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24} = 1;
@@ -737,7 +749,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
def ri12 : T2I<
(outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
!strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
- [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
+ [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -755,7 +768,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
// register
def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm),
IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm",
- [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]> {
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -769,7 +783,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
def rs : T2sTwoRegShiftedReg<
(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
- [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]> {
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]>,
+ Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24} = 1;
@@ -787,7 +802,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
[(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_imm:$imm, CPSR))]>,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2]>, Sched<[WriteALU, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -797,7 +812,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
opc, ".w\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, rGPR:$Rm, CPSR))]>,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2]>, Sched<[WriteALU, ReadALU, ReadALU]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -811,7 +826,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
[(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm, CPSR))]>,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2]>, Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -826,7 +841,8 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
def ri : T2sTwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
opc, ".w\t$Rd, $Rm, $imm",
- [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-21} = 0b010010;
let Inst{19-16} = 0b1111; // Rn
@@ -836,7 +852,8 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
def rr : T2sThreeReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr,
opc, ".w\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-21} = opcod;
@@ -880,7 +897,7 @@ let isCompare = 1, Defs = [CPSR] in {
def ri : T2OneRegCmpImm<
(outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii,
opc, ".w\t$Rn, $imm",
- [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]> {
+ [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]>, Sched<[WriteCMP]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -892,7 +909,7 @@ let isCompare = 1, Defs = [CPSR] in {
def rr : T2TwoRegCmp<
(outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir,
opc, ".w\t$Rn, $Rm",
- [(opnode GPRnopc:$Rn, rGPR:$Rm)]> {
+ [(opnode GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -906,7 +923,8 @@ let isCompare = 1, Defs = [CPSR] in {
def rs : T2OneRegCmpShiftedReg<
(outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis,
opc, ".w\t$Rn, $ShiftedRm",
- [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> {
+ [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>,
+ Sched<[WriteCMPsi]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -1167,7 +1185,8 @@ class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
// assembler.
def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
(ins t2adrlabel:$addr, pred:$p),
- IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []> {
+ IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []>,
+ Sched<[WriteALU, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -1190,12 +1209,12 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
let neverHasSideEffects = 1, isReMaterializable = 1 in
def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
- 4, IIC_iALUi, []>;
+ 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
let hasSideEffects = 1 in
def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
4, IIC_iALUi,
- []>;
+ []>, Sched<[WriteALU, ReadALU]>;
//===----------------------------------------------------------------------===//
@@ -1520,7 +1539,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
"\t$addr",
- [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> {
+ [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]>,
+ Sched<[WritePreLd]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{22} = 0;
@@ -1537,7 +1557,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc,
"\t$addr",
- [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]> {
+ [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]>,
+ Sched<[WritePreLd]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{23} = 0; // U = 0
@@ -1554,7 +1575,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
"\t$addr",
- [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> {
+ [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]>,
+ Sched<[WritePreLd]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{23} = 0; // add = TRUE for T1
@@ -1743,7 +1765,7 @@ defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
let neverHasSideEffects = 1 in
def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
- "mov", ".w\t$Rd, $Rm", []> {
+ "mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1763,7 +1785,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
AddedComplexity = 1 in
def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
"mov", ".w\t$Rd, $imm",
- [(set rGPR:$Rd, t2_so_imm:$imm)]> {
+ [(set rGPR:$Rd, t2_so_imm:$imm)]>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b0010;
@@ -1786,7 +1808,7 @@ def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
"movw", "\t$Rd, $imm",
- [(set rGPR:$Rd, imm0_65535:$imm)]> {
+ [(set rGPR:$Rd, imm0_65535:$imm)]>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0010;
@@ -1812,7 +1834,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd),
(ins rGPR:$src, imm0_65535_expr:$imm), IIC_iMOVi,
"movt", "\t$Rd, $imm",
[(set rGPR:$Rd,
- (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
+ (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0110;
@@ -1831,7 +1854,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd),
}
def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+ (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
+ Sched<[WriteALU]>;
} // Constraints
def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
@@ -2171,7 +2195,7 @@ def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
let Uses = [CPSR] in {
def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"rrx", "\t$Rd, $Rm",
- [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2185,7 +2209,8 @@ let isCodeGenOnly = 1, Defs = [CPSR] in {
def t2MOVsrl_flag : T2TwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"lsrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2199,7 +2224,8 @@ def t2MOVsrl_flag : T2TwoRegShiftImm<
def t2MOVsra_flag : T2TwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"asrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2320,7 +2346,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
// shifted imm
def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
opc, "\t$Rd, $imm",
- [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]>, Sched<[WriteALU]> {
let isAsCheapAsAMove = Cheap;
let isReMaterializable = ReMat;
let isMoveImm = MoveImm;
@@ -2333,7 +2359,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
// register
def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
opc, ".w\t$Rd, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -2345,7 +2371,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
// shifted register
def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
opc, ".w\t$Rd, $ShiftedRm",
- [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -2804,22 +2831,27 @@ class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
}
def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
- "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>;
+ "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>,
+ Sched<[WriteALU]>;
def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"rbit", "\t$Rd, $Rm",
- [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>;
+ [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>,
+ Sched<[WriteALU]>;
def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
- "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>;
+ "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>,
+ Sched<[WriteALU]>;
def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"rev16", ".w\t$Rd, $Rm",
- [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>;
+ [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>,
+ Sched<[WriteALU]>;
def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"revsh", ".w\t$Rd, $Rm",
- [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>;
+ [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>,
+ Sched<[WriteALU]>;
def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
(and (srl rGPR:$Rm, (i32 8)), 0xFF)),
@@ -2831,7 +2863,8 @@ def t2PKHBT : T2ThreeReg<
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
(and (shl rGPR:$Rm, pkh_lsl_amt:$sh),
0xFFFF0000)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
+ Requires<[HasT2ExtractPack, IsThumb2]>,
+ Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-20} = 0b01100;
@@ -2859,7 +2892,8 @@ def t2PKHTB : T2ThreeReg<
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
(and (sra rGPR:$Rm, pkh_asr_amt:$sh),
0xFFFF)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
+ Requires<[HasT2ExtractPack, IsThumb2]>,
+ Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-20} = 0b01100;
@@ -2900,7 +2934,8 @@ let isCompare = 1, Defs = [CPSR] in {
def t2CMNri : T2OneRegCmpImm<
(outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iCMPi,
"cmn", ".w\t$Rn, $imm",
- [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]> {
+ [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]>,
+ Sched<[WriteCMP, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b1000;
@@ -2913,7 +2948,7 @@ let isCompare = 1, Defs = [CPSR] in {
(outs), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iCMPr,
"cmn", ".w\t$Rn, $Rm",
[(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPRnopc:$Rn, rGPR:$Rm)]> {
+ GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b1000;
@@ -2928,7 +2963,8 @@ let isCompare = 1, Defs = [CPSR] in {
(outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iCMPsi,
"cmn", ".w\t$Rn, $ShiftedRm",
[(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> {
+ GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>,
+ Sched<[WriteCMPsi, ReadALU, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b1000;
@@ -2968,14 +3004,15 @@ def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
(ins rGPR:$false, rGPR:$Rm, pred:$p),
4, IIC_iCMOVr,
[/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ RegConstraint<"$false = $Rd">,
+ Sched<[WriteALU]>;
let isMoveImm = 1 in
def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd),
(ins rGPR:$false, t2_so_imm:$imm, pred:$p),
4, IIC_iCMOVi,
[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
// FIXME: Pseudo-ize these. For now, just mark codegen only.
let isCodeGenOnly = 1 in {
@@ -2983,7 +3020,7 @@ let isMoveImm = 1 in
def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm),
IIC_iCMOVi,
"movw", "\t$Rd, $imm", []>,
- RegConstraint<"$false = $Rd"> {
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0010;
@@ -3010,7 +3047,7 @@ def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
IIC_iCMOVi, "mvn", "\t$Rd, $imm",
[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd"> {
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b0011;
@@ -3021,7 +3058,7 @@ def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> {
+ : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -3072,7 +3109,7 @@ def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
let Inst{3-0} = opt;
}
-def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+def t2ISB : AInoP<(outs), (ins instsyncb_opt:$opt), ThumbFrm, NoItinerary,
"isb", "\t$opt",
[]>, Requires<[IsThumb, HasDB]> {
bits<4> opt;
@@ -3243,7 +3280,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isPredicable = 1 in
def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
"b", ".w\t$target",
- [(br bb:$target)]> {
+ [(br bb:$target)]>, Sched<[WriteBr]> {
let Inst{31-27} = 0b11110;
let Inst{15-14} = 0b10;
let Inst{12} = 1;
@@ -3261,17 +3298,20 @@ let isNotDuplicable = 1, isIndirectBranch = 1 in {
def t2BR_JT : t2PseudoInst<(outs),
(ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
0, IIC_Br,
- [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
+ [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>,
+ Sched<[WriteBr]>;
// FIXME: Add a non-pc based case that can be predicated.
def t2TBB_JT : t2PseudoInst<(outs),
- (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>;
+ (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>,
+ Sched<[WriteBr]>;
def t2TBH_JT : t2PseudoInst<(outs),
- (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>;
+ (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>,
+ Sched<[WriteBr]>;
def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
- "tbb", "\t$addr", []> {
+ "tbb", "\t$addr", []>, Sched<[WriteBrTbl]> {
bits<4> Rn;
bits<4> Rm;
let Inst{31-20} = 0b111010001101;
@@ -3284,7 +3324,7 @@ def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
}
def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br,
- "tbh", "\t$addr", []> {
+ "tbh", "\t$addr", []>, Sched<[WriteBrTbl]> {
bits<4> Rn;
bits<4> Rm;
let Inst{31-20} = 0b111010001101;
@@ -3304,7 +3344,7 @@ def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br,
let isBranch = 1, isTerminator = 1 in
def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
"b", ".w\t$target",
- [/*(ARMbrcond bb:$target, imm:$cc)*/]> {
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]>, Sched<[WriteBr]> {
let Inst{31-27} = 0b11110;
let Inst{15-14} = 0b10;
let Inst{12} = 0;
@@ -3331,7 +3371,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
(ins uncondbrtarget:$dst, pred:$p),
4, IIC_Br, [],
(t2B uncondbrtarget:$dst, pred:$p)>,
- Requires<[IsThumb2, IsIOS]>;
+ Requires<[IsThumb2, IsIOS]>, Sched<[WriteBr]>;
}
// IT block
@@ -3353,7 +3393,8 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
// Branch and Exchange Jazelle -- for disassembly only
// Rm = Inst{19-16}
-def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []> {
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>,
+ Sched<[WriteBr]> {
bits<4> func;
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -3367,7 +3408,7 @@ let isBranch = 1, isTerminator = 1 in {
def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
"cbz\t$Rn, $target", []>,
T1Misc<{0,0,?,1,?,?,?}>,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2]>, Sched<[WriteBr]> {
// A8.6.27
bits<6> target;
bits<3> Rn;
@@ -3379,7 +3420,7 @@ let isBranch = 1, isTerminator = 1 in {
def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
"cbnz\t$Rn, $target", []>,
T1Misc<{1,0,?,1,?,?,?}>,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2]>, Sched<[WriteBr]> {
// A8.6.27
bits<6> target;
bits<3> Rn;
@@ -3981,7 +4022,7 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm",
// Aliases for ADD without the ".w" optional width specifier.
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
- (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+ (t2ADDri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
(t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index b5a896c699..597b74a0c7 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -224,7 +224,36 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
(VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>;
-// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+// FLDMX, FSTMX - Load and store multiple unknown precision registers for
+// pre-armv6 cores.
+// These instruction are deprecated so we don't want them to get selected.
+multiclass vfp_ldstx_mult<string asm, bit L_bit> {
+ // Unknown precision
+ def XIA :
+ AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeNone, !strconcat(asm, "iax${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def XIA_UPD :
+ AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeUpd, !strconcat(asm, "iax${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def XDB_UPD :
+ AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeUpd, !strconcat(asm, "dbx${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1;
+ let Inst{20} = L_bit;
+ }
+}
+
+defm FLDM : vfp_ldstx_mult<"fldm", 1>;
+defm FSTM : vfp_ldstx_mult<"fstm", 0>;
//===----------------------------------------------------------------------===//
// FP Binary Operations.
@@ -841,7 +870,8 @@ let Constraints = "$a = $dst" in {
class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
bit op5, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
+ Sched<[WriteCvtFP]> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{0};
@@ -852,7 +882,8 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
bit op5, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
+ Sched<[WriteCvtFP]> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{4};
@@ -1300,6 +1331,10 @@ let Uses = [FPSCR] in {
"vmrs", "\t$Rt, mvfr0", []>;
def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins),
"vmrs", "\t$Rt, mvfr1", []>;
+ def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpinst", []>;
+ def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpinst2", []>;
}
//===----------------------------------------------------------------------===//
@@ -1333,6 +1368,11 @@ let Defs = [FPSCR] in {
// System level GPR -> FPSID
def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src),
"vmsr", "\tfpsid, $src", []>;
+
+ def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpinst, $src", []>;
+ def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpinst2, $src", []>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index f4248fcfcc..d9ec4fd221 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -36,6 +36,13 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// 'isThumb'.
bool hasThumb2;
+ /// StByValParamsPadding - For parameter that is split between
+ /// GPRs and memory; while recovering GPRs part, when
+ /// StackAlignment == 8, and GPRs-part-size mod 8 != 0,
+ /// we need to insert gap before parameter start address. It allows to
+ /// "attach" GPR-part to the part that was passed via stack.
+ unsigned StByValParamsPadding;
+
/// VarArgsRegSaveSize - Size of the register save area for vararg functions.
///
unsigned ArgRegsSaveSize;
@@ -129,6 +136,7 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+ StByValParamsPadding(0),
ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
@@ -141,7 +149,14 @@ public:
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
bool isThumb2Function() const { return isThumb && hasThumb2; }
- unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
+ unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; }
+ void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; }
+
+ unsigned getArgRegsSaveSize(unsigned Align = 0) const {
+ if (!Align)
+ return ArgRegsSaveSize;
+ return (ArgRegsSaveSize + Align - 1) & ~(Align - 1);
+ }
void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
bool hasStackFrame() const { return HasStackFrame; }
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 6f3819afd0..a7880364d8 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -18,7 +18,6 @@ using namespace llvm;
void ARMRegisterInfo::anchor() { }
-ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
- const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(tii, sti) {
+ARMRegisterInfo::ARMRegisterInfo(const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(sti) {
}
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 8a248425c3..fb1537cf94 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -19,13 +19,13 @@
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
- class ARMSubtarget;
- class ARMBaseInstrInfo;
+
+class ARMSubtarget;
struct ARMRegisterInfo : public ARMBaseRegisterInfo {
virtual void anchor();
public:
- ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+ ARMRegisterInfo(const ARMSubtarget &STI);
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index b0f576bc2b..0459d645c4 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -27,31 +27,31 @@ class ARMFReg<bits<16> Enc, string n> : Register<n> {
// Subregister indices.
let Namespace = "ARM" in {
-def qqsub_0 : SubRegIndex;
-def qqsub_1 : SubRegIndex;
+def qqsub_0 : SubRegIndex<256>;
+def qqsub_1 : SubRegIndex<256, 256>;
// Note: Code depends on these having consecutive numbers.
-def qsub_0 : SubRegIndex;
-def qsub_1 : SubRegIndex;
-def qsub_2 : SubRegIndex<[qqsub_1, qsub_0]>;
-def qsub_3 : SubRegIndex<[qqsub_1, qsub_1]>;
-
-def dsub_0 : SubRegIndex;
-def dsub_1 : SubRegIndex;
-def dsub_2 : SubRegIndex<[qsub_1, dsub_0]>;
-def dsub_3 : SubRegIndex<[qsub_1, dsub_1]>;
-def dsub_4 : SubRegIndex<[qsub_2, dsub_0]>;
-def dsub_5 : SubRegIndex<[qsub_2, dsub_1]>;
-def dsub_6 : SubRegIndex<[qsub_3, dsub_0]>;
-def dsub_7 : SubRegIndex<[qsub_3, dsub_1]>;
-
-def ssub_0 : SubRegIndex;
-def ssub_1 : SubRegIndex;
-def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>;
-def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>;
-
-def gsub_0 : SubRegIndex;
-def gsub_1 : SubRegIndex;
+def qsub_0 : SubRegIndex<128>;
+def qsub_1 : SubRegIndex<128, 128>;
+def qsub_2 : ComposedSubRegIndex<qqsub_1, qsub_0>;
+def qsub_3 : ComposedSubRegIndex<qqsub_1, qsub_1>;
+
+def dsub_0 : SubRegIndex<64>;
+def dsub_1 : SubRegIndex<64, 64>;
+def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>;
+def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>;
+def dsub_4 : ComposedSubRegIndex<qsub_2, dsub_0>;
+def dsub_5 : ComposedSubRegIndex<qsub_2, dsub_1>;
+def dsub_6 : ComposedSubRegIndex<qsub_3, dsub_0>;
+def dsub_7 : ComposedSubRegIndex<qsub_3, dsub_1>;
+
+def ssub_0 : SubRegIndex<32>;
+def ssub_1 : SubRegIndex<32, 32>;
+def ssub_2 : ComposedSubRegIndex<dsub_1, ssub_0>;
+def ssub_3 : ComposedSubRegIndex<dsub_1, ssub_1>;
+
+def gsub_0 : SubRegIndex<32>;
+def gsub_1 : SubRegIndex<32, 32>;
// Let TableGen synthesize the remaining 12 ssub_* indices.
// We don't need to name them.
}
@@ -157,21 +157,26 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>;
// Current Program Status Register.
// We model fpscr with two registers: FPSCR models the control bits and will be
-// reserved. FPSCR_NZCV models the flag bits and will be unreserved.
-def CPSR : ARMReg<0, "cpsr">;
-def APSR : ARMReg<1, "apsr">;
-def SPSR : ARMReg<2, "spsr">;
-def FPSCR : ARMReg<3, "fpscr">;
-def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
+// reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV
+// models the APSR when it's accessed by some special instructions. In such cases
+// it has the same encoding as PC.
+def CPSR : ARMReg<0, "cpsr">;
+def APSR : ARMReg<1, "apsr">;
+def APSR_NZCV : ARMReg<15, "apsr_nzcv">;
+def SPSR : ARMReg<2, "spsr">;
+def FPSCR : ARMReg<3, "fpscr">;
+def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
let Aliases = [FPSCR];
}
def ITSTATE : ARMReg<4, "itstate">;
// Special Registers - only available in privileged mode.
-def FPSID : ARMReg<0, "fpsid">;
-def MVFR1 : ARMReg<6, "mvfr1">;
-def MVFR0 : ARMReg<7, "mvfr0">;
-def FPEXC : ARMReg<8, "fpexc">;
+def FPSID : ARMReg<0, "fpsid">;
+def MVFR1 : ARMReg<6, "mvfr1">;
+def MVFR0 : ARMReg<7, "mvfr0">;
+def FPEXC : ARMReg<8, "fpexc">;
+def FPINST : ARMReg<9, "fpinst">;
+def FPINST2 : ARMReg<10, "fpinst2">;
// Register classes.
//
@@ -207,6 +212,16 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
}];
}
+// GPRs without the PC but with APSR. Some instructions allow accessing the
+// APSR, while actually encoding PC in the register field. This is usefull
+// for assembly and disassembly only.
+def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add GPR, APSR_NZCV)> {
+ let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
// GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the
// implied SP argument list.
// FIXME: It would be better to not use this at all and refactor the
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 2d088de96e..528c4ec737 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -69,6 +69,24 @@ def WriteCMP : SchedWrite;
def WriteCMPsi : SchedWrite;
def WriteCMPsr : SchedWrite;
+// Division.
+def WriteDiv : SchedWrite;
+
+// Loads.
+def WriteLd : SchedWrite;
+def WritePreLd : SchedWrite;
+
+// Branches.
+def WriteBr : SchedWrite;
+def WriteBrL : SchedWrite;
+def WriteBrTbl : SchedWrite;
+
+// Fixpoint conversions.
+def WriteCvtFP : SchedWrite;
+
+// Noop.
+def WriteNoop : SchedWrite;
+
// Define TII for use in SchedVariant Predicates.
def : PredicateProlog<[{
const ARMBaseInstrInfo *TII =
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 9739ed20ce..d06ad7d669 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -2275,10 +2275,10 @@ def A9Read4 : SchedReadAdvance<3>;
// This table follows the ARM Cortex-A9 Technical Reference Manuals,
// mostly in order.
-def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
+def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
IIC_iMVNi,IIC_iMVNsi,
IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
-def :ItinRW<[A9WriteI,A9ReadALU],[IIC_iMVNr]>;
+def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>;
def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>;
@@ -2487,10 +2487,59 @@ def : SchedAlias<WriteALUsr, A9WriteALUsr>;
def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
def : SchedAlias<ReadALU, A9ReadALU>;
def : SchedAlias<ReadALUsr, A9ReadALU>;
-// FIXME: need to special case AND, ORR, EOR, BIC because they don't read
-// advance. But our instrinfo claims it does.
+def : InstRW< [WriteALU],
+ (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
+ "BICrr")>;
+def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>;
+def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>;
+
def : SchedAlias<WriteCMP, A9WriteALU>;
def : SchedAlias<WriteCMPsi, A9WriteALU>;
def : SchedAlias<WriteCMPsr, A9WriteALU>;
+
+def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi",
+ "MOVCCsr")>;
+def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>;
+def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm",
+ "MOV_ga_dyn")>;
+def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>;
+def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
+
+def : InstRW< [WriteALU], (instregex "SEL")>;
+
+def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>;
+
+def : InstRW< [A9WriteM],
+ (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS",
+ "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>;
+def : InstRW< [A9WriteM, A9WriteMHi],
+ (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL",
+ "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB",
+ "SMLALTT")>;
+// FIXME: These instructions used to have NoItinerary. Just copied the one from above.
+def : InstRW< [A9WriteM, A9WriteMHi],
+ (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX",
+ "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>;
+
+def : InstRW<[A9WriteM16, A9WriteM16Hi],
+ (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>;
+def : InstRW<[A9WriteM16, A9WriteM16Hi],
+ (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>;
+
+def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>;
+def : InstRW<[A9WriteLsi], (instregex "LDRrs")>;
+def : InstRW<[A9WriteLb],
+ (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB",
+ "LDRH", "LDRSH", "LDRSB")>;
+def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
+
+def : WriteRes<WriteDiv, []> { let Latency = 0; }
+
+def : WriteRes<WriteBr, [A9UnitB]>;
+def : WriteRes<WriteBrL, [A9UnitB]>;
+def : WriteRes<WriteBrTbl, [A9UnitB]>;
+def : WriteRes<WritePreLd, []>;
+def : SchedAlias<WriteCvtFP, A9WriteF>;
+def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
} // SchedModel = CortexA9Model
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index 7c6df41070..b5cf2518c0 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -1096,9 +1096,27 @@ let SchedModel = SwiftModel in {
def SwiftUnitDiv : ProcResource<1>;
// Generic resource requirements.
+ def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>;
+ def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; }
+ def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; }
+ def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; }
+ def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
+ let Latency = 4;
+ }
+ def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
+ let Latency = 6;
+ }
+ def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>;
+ def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; }
+ def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; }
+ def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; }
+ def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; }
+ def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; }
+ def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>;
+ def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>;
def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
- def SwiftWriteP01ThreeCycleTwoUops :
- SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]> {
+ def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01,
+ SwiftUnitP01]> {
let Latency = 3;
let NumMicroOps = 2;
}
@@ -1107,7 +1125,23 @@ let SchedModel = SwiftModel in {
let NumMicroOps = 3;
let ResourceCycles = [3];
}
-
+ // Plain load without writeback.
+ def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 3;
+ }
+ def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 4;
+ }
+ // A store does not write to a register.
+ def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 0;
+ }
+ foreach Num = 1-4 in {
+ def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>;
+ }
+ def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle,
+ SwiftWriteP01OneCycle,
+ SwiftWriteP2ThreeCycle]>;
// 4.2.4 Arithmetic and Logical.
// ALU operation register shifted by immediate variant.
def SwiftWriteALUsi : SchedWriteVariant<[
@@ -1137,8 +1171,897 @@ let SchedModel = SwiftModel in {
def : ReadAdvance<ReadALU, 0>;
def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
+
+ def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
+ SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>,
+ SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
+ ]>;
+
// 4.2.5 Integer comparison
def : WriteRes<WriteCMP, [SwiftUnitP01]>;
- def : WriteRes<WriteCMPsi, [SwiftUnitP01]>;
- def : WriteRes<WriteCMPsr, [SwiftUnitP01]>;
+ def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>;
+ def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>;
+
+ // 4.2.6 Shift, Move
+ // Shift
+ // ASR,LSL,ROR,RRX
+ // MOV(register-shiftedregister) MVN(register-shiftedregister)
+ // Move
+ // MOV,MVN
+ // MOVT
+ // Sign/Zero extension
+ def : InstRW<[SwiftWriteP01OneCycle],
+ (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
+ "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH",
+ "t2UXTB16")>;
+ // Pseudo instructions.
+ def : InstRW<[SwiftWriteP01OneCycle2x],
+ (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm",
+ "t2MOVi32imm", "t2MOV_ga_dyn")>;
+ def : InstRW<[SwiftWriteP01OneCycle3x],
+ (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>;
+ def : InstRW<[SwiftWriteP01OneCycle2x_load],
+ (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
+
+ def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
+
+ def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
+ SchedVar<NoSchedPred, [ SwiftWriteP0OneCycle ]>
+ ]>;
+
+ // 4.2.7 Select
+ // SEL
+ def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>;
+
+ // 4.2.8 Bitfield
+ // BFI,BFC, SBFX,UBFX
+ def : InstRW< [SwiftWriteP01TwoCycle],
+ (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
+ "(t|t2)UBFX", "(t|t2)SBFX")>;
+
+ // 4.2.9 Saturating arithmetic
+ def : InstRW< [SwiftWriteP01TwoCycle],
+ (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
+ "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
+ "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
+ "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
+ "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
+ "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
+
+ // 4.2.10 Parallel Arithmetic
+ // Not flag setting.
+ def : InstRW< [SwiftWriteALUsr],
+ (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
+ "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
+ "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
+ "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
+ // Flag setting.
+ def : InstRW< [SwiftWriteP01TwoCycle],
+ (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
+ "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
+ "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
+ "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
+ "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
+ "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
+
+ // 4.2.11 Sum of Absolute Difference
+ def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >;
+ def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>],
+ (instregex "USADA8")>;
+
+ // 4.2.12 Integer Multiply (32-bit result)
+ // Two sources.
+ def : InstRW< [SwiftWriteP0FourCycle],
+ (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
+ "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
+ "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
+ "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
+
+ def SwiftWriteP0P01FiveCycleTwoUops :
+ SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
+ let Latency = 5;
+ }
+
+ def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>,
+ SchedVar<NoSchedPred, [ SwiftWriteP0FourCycle ]>
+ ]>;
+
+ def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[
+ SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>,
+ SchedVar<NoSchedPred, [ReadALU]>
+ ]>;
+
+ // Multiply accumulate, three sources
+ def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
+ SwiftReadAdvanceFourCyclesPred],
+ (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
+ "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS",
+ "t2SMMLSR")>;
+
+ // 4.2.13 Integer Multiply (32-bit result, Q flag)
+ def : InstRW< [SwiftWriteP0FourCycle],
+ (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>;
+ def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
+ SwiftReadAdvanceFourCyclesPred],
+ (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
+ "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
+ "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>;
+ def : InstRW< [SwiftPredP0P01FourFiveCycle],
+ (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>;
+
+ def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+ }
+ def SwiftWrite1Cycle : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+ }
+ def SwiftWrite5Cycle : SchedWriteRes<[]> {
+ let Latency = 5;
+ let NumMicroOps = 0;
+ }
+ def SwiftWrite6Cycle : SchedWriteRes<[]> {
+ let Latency = 6;
+ let NumMicroOps = 0;
+ }
+
+ // 4.2.14 Integer Multiply, Long
+ def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle],
+ (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>;
+
+ def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [2, 3];
+ }
+
+ // 4.2.15 Integer Multiply Accumulate, Long
+ // 4.2.16 Integer Multiply Accumulate, Dual
+ // 4.2.17 Integer Multiply Accumulate Accumulate, Long
+ // We are being a bit inaccurate here.
+ def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU,
+ SchedReadAdvance<4>, SchedReadAdvance<3>],
+ (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
+ "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
+ "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT",
+ "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX",
+ "t2UMAAL")>;
+
+ def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
+ let NumMicroOps = 1;
+ let Latency = 14;
+ let ResourceCycles = [1, 14];
+ }
+ // 4.2.18 Integer Divide
+ def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
+ def : InstRW <[SwiftDiv],
+ (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
+
+ // 4.2.19 Integer Load Single Element
+ // 4.2.20 Integer Load Signextended
+ def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
+ let Latency = 3;
+ }
+ def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
+ let Latency = 4;
+ }
+ def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01,
+ SwiftUnitP01]> {
+ let Latency = 4;
+ }
+ def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> {
+ let Latency = 3;
+ }
+ def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2,
+ SwiftUnitP01]> {
+ let Latency = 3;
+ }
+ def SwiftWrBackOne : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+ }
+ def SwiftWriteLdFour : SchedWriteRes<[]> {
+ let Latency = 4;
+ let NumMicroOps = 0;
+ }
+ // Not accurate.
+ def : InstRW<[SwiftWriteP2ThreeCycle],
+ (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
+ "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "tLDR[BH](r|i|spi|pci|pciASM)",
+ "tLDR(r|i|spi|pci|pciASM)")>;
+ def : InstRW<[SwiftWriteP2ThreeCycle],
+ (instregex "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>;
+ def : InstRW<[SwiftWriteP2P01FourCyle],
+ (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
+ "t2LDRpci_pic", "tLDRS(B|H)")>;
+ def : InstRW<[SwiftWriteP2P01ThreeCycle, SwiftWrBackOne],
+ (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
+ "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
+ "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>;
+ def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne],
+ (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
+ "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>;
+
+ // 4.2.21 Integer Dual Load
+ // Not accurate.
+ def : InstRW<[SwiftWriteP2P2ThreeCycle, SwiftWriteLdFour],
+ (instregex "t2LDRDi8", "LDRD$")>;
+ def : InstRW<[SwiftWriteP2P2P01ThreeCycle, SwiftWriteLdFour, SwiftWrBackOne],
+ (instregex "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
+
+ // 4.2.22 Integer Load, Multiple
+ // NumReg = 1 .. 16
+ foreach Lat = 3-25 in {
+ def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = Lat;
+ }
+ def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> { let Latency = Lat; }
+ }
+ // Predicate.
+ foreach NumAddr = 1-16 in {
+ def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
+ }
+ def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; }
+ def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>;
+ def SwiftWriteLM : SchedWriteVariant<[
+ SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy]>,
+ SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy]>,
+ SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy]>,
+ SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy]>,
+ SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy]>,
+ SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy]>,
+ SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy]>,
+ SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy]>,
+ SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy]>,
+ SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy]>,
+ SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy]>,
+ SchedVar<SwiftLMAddr13Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy]>,
+ SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy, SwiftWriteLM16Cy]>,
+ SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy, SwiftWriteLM16Cy,
+ SwiftWriteLM17Cy]>,
+ SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy, SwiftWriteLM16Cy,
+ SwiftWriteLM17Cy, SwiftWriteLM18Cy]>,
+ // Unknow number of registers, just use resources for two registers.
+ SchedVar<NoSchedPred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5CyNo, SwiftWriteLM6CyNo,
+ SwiftWriteLM7CyNo, SwiftWriteLM8CyNo,
+ SwiftWriteLM9CyNo, SwiftWriteLM10CyNo,
+ SwiftWriteLM11CyNo, SwiftWriteLM12CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM15CyNo, SwiftWriteLM16CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo]>
+
+ ]> { let Variadic=1; }
+
+ def : InstRW<[SwiftWriteLM, SwiftWriteLDMAddrNoWB],
+ (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
+ "(t|sys)LDM(IA|DA|DB|IB)$")>;
+ def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM],
+ (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/
+ "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
+ def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle],
+ (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
+ // 4.2.23 Integer Store, Single Element
+ def : InstRW<[SwiftWriteP2],
+ (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX",
+ "t2STR(i12|i8|s)$", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
+
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2],
+ (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
+ "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
+ "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
+ "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
+
+ // 4.2.24 Integer Store, Dual
+ def : InstRW<[SwiftWriteP2, SwiftWriteP2, SwiftWriteP01OneCycle],
+ (instregex "STRD$", "t2STRDi8")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2, SwiftWriteP2,
+ SwiftWriteP01OneCycle],
+ (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
+
+ // 4.2.25 Integer Store, Multiple
+ def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
+ let Latency = 0;
+ }
+ foreach NumAddr = 1-16 in {
+ def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>;
+ }
+ def SwiftWriteSTM : SchedWriteVariant<[
+ SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM2]>,
+ SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM3]>,
+ SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM4]>,
+ SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM5]>,
+ SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM6]>,
+ SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM7]>,
+ SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM8]>,
+ SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM9]>,
+ SchedVar<SwiftLMAddr10Pred,[SwiftWriteSTM10]>,
+ SchedVar<SwiftLMAddr11Pred,[SwiftWriteSTM11]>,
+ SchedVar<SwiftLMAddr12Pred,[SwiftWriteSTM12]>,
+ SchedVar<SwiftLMAddr13Pred,[SwiftWriteSTM13]>,
+ SchedVar<SwiftLMAddr14Pred,[SwiftWriteSTM14]>,
+ SchedVar<SwiftLMAddr15Pred,[SwiftWriteSTM15]>,
+ SchedVar<SwiftLMAddr16Pred,[SwiftWriteSTM16]>,
+ // Unknow number of registers, just use resources for two registers.
+ SchedVar<NoSchedPred, [SwiftWriteSTM2]>
+ ]>;
+ def : InstRW<[SwiftWriteSTM],
+ (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM],
+ (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
+ "PUSH", "tPUSH")>;
+
+ // 4.2.26 Branch
+ def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
+ def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
+ def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; }
+
+ // 4.2.27 Not issued
+ def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
+ def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>;
+
+ // 4.2.28 Advanced SIMD, Integer, 2 cycle
+ def : InstRW<[SwiftWriteP0TwoCycle],
+ (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
+ "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
+ "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
+ "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF",
+ "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
+
+ def : InstRW<[SwiftWriteP1TwoCycle],
+ (instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
+
+ // 4.2.29 Advanced SIMD, Integer, 4 cycle
+ // 4.2.30 Advanced SIMD, Integer with Accumulate
+ def : InstRW<[SwiftWriteP0FourCycle],
+ (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT",
+ "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL",
+ "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD",
+ "VQSUB")>;
+ def : InstRW<[SwiftWriteP1FourCycle],
+ (instregex "VRECPE", "VRSQRTE")>;
+
+ // 4.2.31 Advanced SIMD, Add and Shift with Narrow
+ def : InstRW<[SwiftWriteP0P1FourCycle],
+ (instregex "VADDHN", "VSUBHN", "VSHRN")>;
+ def : InstRW<[SwiftWriteP0P1SixCycle],
+ (instregex "VRADDHN", "VRSUBHN", "VRSHRN", "VQSHRN", "VQSHRUN",
+ "VQRSHRN", "VQRSHRUN")>;
+
+ // 4.2.32 Advanced SIMD, Vector Table Lookup
+ foreach Num = 1-4 in {
+ def SwiftWrite#Num#xP1TwoCycle : WriteSequence<[SwiftWriteP1TwoCycle], Num>;
+ }
+ def : InstRW<[SwiftWrite1xP1TwoCycle],
+ (instregex "VTB(L|X)1")>;
+ def : InstRW<[SwiftWrite2xP1TwoCycle],
+ (instregex "VTB(L|X)2")>;
+ def : InstRW<[SwiftWrite3xP1TwoCycle],
+ (instregex "VTB(L|X)3")>;
+ def : InstRW<[SwiftWrite4xP1TwoCycle],
+ (instregex "VTB(L|X)4")>;
+
+ // 4.2.33 Advanced SIMD, Transpose
+ def : InstRW<[SwiftWriteP1FourCycle, SwiftWriteP1FourCycle,
+ SwiftWriteP1TwoCycle/*RsrcOnly*/, SchedReadAdvance<2>],
+ (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
+
+ // 4.2.34 Advanced SIMD and VFP, Floating Point
+ def : InstRW<[SwiftWriteP0TwoCycle], (instregex "VABS(S|D)$", "VNEG(S|D)$")>;
+ def : InstRW<[SwiftWriteP0FourCycle],
+ (instregex "VCMP(D|S|ZD|ZS)$", "VCMPE(D|S|ZD|ZS)")>;
+ def : InstRW<[SwiftWriteP0FourCycle],
+ (instregex "VADD(S|f)", "VSUB(S|f)", "VABD", "VPADDf", "VMAX", "VMIN", "VPMAX",
+ "VPMIN")>;
+ def : InstRW<[SwiftWriteP0SixCycle], (instregex "VADDD$", "VSUBD$")>;
+ def : InstRW<[SwiftWriteP1EightCycle], (instregex "VRECPS", "VRSQRTS")>;
+
+ // 4.2.35 Advanced SIMD and VFP, Multiply
+ def : InstRW<[SwiftWriteP1FourCycle],
+ (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
+ "VMULL", "VQDMULL")>;
+ def : InstRW<[SwiftWriteP1SixCycle],
+ (instregex "VMULD", "VNMULD")>;
+ def : InstRW<[SwiftWriteP1FourCycle],
+ (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
+ "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
+ def : InstRW<[SwiftWriteP1EightCycle], (instregex "VFMAfd", "VFMSfd")>;
+ def : InstRW<[SwiftWriteP1TwelveCyc], (instregex "VFMAfq", "VFMSfq")>;
+
+ // 4.2.36 Advanced SIMD and VFP, Convert
+ def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
+ // Fixpoint conversions.
+ def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
+
+ // 4.2.37 Advanced SIMD and VFP, Move
+ def : InstRW<[SwiftWriteP0TwoCycle],
+ (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc",
+ "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc",
+ "FCONST(D|S)")>;
+ def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>],
+ (instregex "VQMOVN")>;
+ def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>],
+ (instregex "VDUP(8|16|32)")>;
+ def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP0TwoCycle]>],
+ (instregex "VMOVSR$", "VSETLN")>;
+ def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2FourCycle],
+ (instregex "VMOVRR(D|S)$")>;
+ def : InstRW<[SwiftWriteP2FourCycle], (instregex "VMOVDRR$")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>,
+ WriteSequence<[SwiftWrite1Cycle, SwiftWriteP2FourCycle,
+ SwiftWriteP1TwoCycle]>],
+ (instregex "VMOVSRR$")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle]>],
+ (instregex "VGETLN(u|i)")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle,
+ SwiftWriteP01OneCycle]>],
+ (instregex "VGETLNs")>;
+
+ // 4.2.38 Advanced SIMD and VFP, Move FPSCR
+ // Serializing instructions.
+ def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> {
+ let Latency = 15;
+ let ResourceCycles = [15];
+ }
+ def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> {
+ let Latency = 15;
+ let ResourceCycles = [15];
+ }
+ def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 15;
+ let ResourceCycles = [15];
+ }
+ def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
+ (instregex "VMRS")>;
+ def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
+ (instregex "VMSR")>;
+ // Not serializing.
+ def : InstRW<[SwiftWriteP0TwoCycle], (instregex "FMSTAT")>;
+
+ // 4.2.39 Advanced SIMD and VFP, Load Single Element
+ def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDRD$", "VLDRS$")>;
+
+ // 4.2.40 Advanced SIMD and VFP, Store Single Element
+ def : InstRW<[SwiftWriteLM4Cy], (instregex "VSTRD$", "VSTRS$")>;
+
+ // 4.2.41 Advanced SIMD and VFP, Load Multiple
+ // 4.2.42 Advanced SIMD and VFP, Store Multiple
+
+ // Resource requirement for permuting, just reserves the resources.
+ foreach Num = 1-28 in {
+ def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> {
+ let Latency = 0;
+ let NumMicroOps = Num;
+ let ResourceCycles = [Num];
+ }
+ }
+
+ // Pre RA pseudos - load/store to a Q register as a D register pair.
+ def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDMQIA$", "VSTMQIA$")>;
+
+ // Post RA not modelled accurately. We assume that register use of width 64
+ // bit maps to a D register, 128 maps to a Q register. Not all different kinds
+ // are accurately represented.
+ def SwiftWriteVLDM : SchedWriteVariant<[
+ // Load of one S register.
+ SchedVar<SwiftLMAddr1Pred, [SwiftWriteLM4Cy]>,
+ // Load of one D register.
+ SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo]>,
+ // Load of 3 S register.
+ SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm3]>,
+ // Load of a Q register (not neccessarily true). We should not be mapping to
+ // 4 S registers, either.
+ SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo,
+ SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>,
+ // Load of 5 S registers.
+ SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm5]>,
+ // Load of 3 D registers. (Must also be able to handle s register list -
+ // though, not accurate)
+ SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM10Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
+ // Load of 7 S registers.
+ SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm7]>,
+ // Load of two Q registers.
+ SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm2]>,
+ // Load of 9 S registers.
+ SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm9]>,
+ // Load of 5 D registers.
+ SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM10Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
+ // Inaccurate: reuse describtion from 9 S registers.
+ SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm9]>,
+ // Load of three Q registers.
+ SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM11Cy,
+ SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
+ SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
+ SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
+ SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm3]>,
+ // Inaccurate: reuse describtion from 9 S registers.
+ SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm9]>,
+ // Load of 7 D registers inaccurate.
+ SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM10Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM14Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm7]>,
+ SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM17Cy, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm9]>,
+ // Load of 4 Q registers.
+ SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM7Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy, SwiftWriteLM18CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm4]>,
+ // Unknow number of registers, just use resources for two registers.
+ SchedVar<NoSchedPred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm2]>
+ ]> { let Variadic = 1; }
+
+ def : InstRW<[SwiftWriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
+
+ def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVLDM],
+ (instregex "VLDM[SD](IA|DB)_UPD$")>;
+
+ def SwiftWriteVSTM : SchedWriteVariant<[
+ // One S register.
+ SchedVar<SwiftLMAddr1Pred, [SwiftWriteSTM1]>,
+ // One D register.
+ SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM1]>,
+ // Three S registers.
+ SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM4]>,
+ // Assume one Q register.
+ SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM1]>,
+ SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM6]>,
+ // Assume three D registers.
+ SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM4]>,
+ SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM8]>,
+ // Assume two Q registers.
+ SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM3]>,
+ SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM10]>,
+ // Assume 5 D registers.
+ SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>,
+ SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>,
+ // Asume three Q registers.
+ SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>,
+ SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>,
+ // Assume 7 D registers.
+ SchedVar<SwiftLMAddr14Pred, [SwiftWriteSTM8]>,
+ SchedVar<SwiftLMAddr15Pred, [SwiftWriteSTM16]>,
+ // Assume four Q registers.
+ SchedVar<SwiftLMAddr16Pred, [SwiftWriteSTM5]>,
+ // Asumme two Q registers.
+ SchedVar<NoSchedPred, [SwiftWriteSTM3]>
+ ]> { let Variadic = 1; }
+
+ def : InstRW<[SwiftWriteVSTM], (instregex "VSTM[SD](IA|DB)$")>;
+
+ def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVSTM],
+ (instregex "VSTM[SD](IA|DB)_UPD")>;
+
+ // 4.2.43 Advanced SIMD, Element or Structure Load and Store
+ def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+ }
+ def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 4;
+ let ResourceCycles = [3];
+ }
+ foreach Num = 1-2 in {
+ def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> {
+ let Latency = 0;
+ let NumMicroOps = Num;
+ let ResourceCycles = [Num];
+ }
+ }
+ // VLDx
+ // Multiple structures.
+ // Single element structure loads.
+ // We assume aligned.
+ // Single/two register.
+ def : InstRW<[SwiftWriteLM4Cy], (instregex "VLD1(d|q)(8|16|32|64)$")>;
+ def : InstRW<[SwiftWriteLM4Cy, SwiftWriteP01OneCycle],
+ (instregex "VLD1(d|q)(8|16|32|64)wb")>;
+ // Three register.
+ def : InstRW<[SwiftWrite3xP2FourCy],
+ (instregex "VLD1(d|q)(8|16|32|64)T$", "VLD1d64TPseudo")>;
+ def : InstRW<[SwiftWrite3xP2FourCy, SwiftWriteP01OneCycle],
+ (instregex "VLD1(d|q)(8|16|32|64)Twb")>;
+ /// Four Register.
+ def : InstRW<[SwiftWrite2xP2FourCy],
+ (instregex "VLD1(d|q)(8|16|32|64)Q$", "VLD1d64QPseudo")>;
+ def : InstRW<[SwiftWrite2xP2FourCy, SwiftWriteP01OneCycle],
+ (instregex "VLD1(d|q)(8|16|32|64)Qwb")>;
+ // Two element structure loads.
+ // Two/four register.
+ def : InstRW<[SwiftWriteLM9Cy, SwiftExt2xP0, SwiftVLDMPerm2],
+ (instregex "VLD2(d|q|b)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
+ SwiftVLDMPerm2],
+ (instregex "VLD2(d|q|b)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
+ // Three element structure.
+ def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
+ SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
+ (instregex "VLD3(d|q)(8|16|32)$")>;
+ def : InstRW<[SwiftWriteLM9Cy, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
+ (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
+
+ def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
+ (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
+ def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm3,
+ SwiftWrite3xP2FourCy],
+ (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+ // Four element structure loads.
+ def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
+ SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
+ SwiftWrite3xP2FourCy],
+ (instregex "VLD4(d|q)(8|16|32)$")>;
+ def : InstRW<[SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
+ SwiftWrite3xP2FourCy],
+ (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
+ def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
+ SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
+ SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
+ (instregex "VLD4(d|q)(8|16|32)_UPD")>;
+ def : InstRW<[SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
+ SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
+ (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+ // Single all/lane loads.
+ // One element structure.
+ def : InstRW<[SwiftWriteLM6Cy, SwiftVLDMPerm2],
+ (instregex "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm2],
+ (instregex "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)",
+ "VLD1LNq(8|16|32)Pseudo_UPD")>;
+ // Two element structure.
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftExt1xP0, SwiftVLDMPerm2],
+ (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
+ "VLD2LN(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftWriteP01OneCycle,
+ SwiftExt1xP0, SwiftVLDMPerm2],
+ (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
+ SwiftExt1xP0, SwiftVLDMPerm2],
+ (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb")>;
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
+ SwiftExt1xP0, SwiftVLDMPerm2],
+ (instregex "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
+ // Three element structure.
+ def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, SwiftExt1xP0,
+ SwiftVLDMPerm3],
+ (instregex "VLD3(DUP|LN)(d|q)(8|16|32)$",
+ "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy,
+ SwiftWriteP01OneCycle, SwiftExt1xP0, SwiftVLDMPerm3],
+ (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
+ def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy,
+ SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3],
+ (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
+ // Four element struture.
+ def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
+ SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5],
+ (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$",
+ "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
+ SwiftWriteLM10CyNo, SwiftWriteP01OneCycle, SwiftExt1xP0,
+ SwiftVLDMPerm5],
+ (instregex "VLD4(DUP|LN)(d|q)(8|16|32)_UPD")>;
+ def : InstRW<[SwiftWriteLM8Cy, SwiftWriteP01OneCycle, SwiftWriteLM9Cy,
+ SwiftWriteLM10CyNo, SwiftWriteLM10CyNo, SwiftExt1xP0,
+ SwiftVLDMPerm5],
+ (instregex "VLD4(DUP|LN)(d|q)(8|16|32)Pseudo_UPD")>;
+ // VSTx
+ // Multiple structures.
+ // Single element structure store.
+ def : InstRW<[SwiftWrite1xP2], (instregex "VST1d(8|16|32|64)$")>;
+ def : InstRW<[SwiftWrite2xP2], (instregex "VST1q(8|16|32|64)$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2],
+ (instregex "VST1d(8|16|32|64)wb")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2],
+ (instregex "VST1q(8|16|32|64)wb")>;
+ def : InstRW<[SwiftWrite3xP2],
+ (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite3xP2],
+ (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
+ def : InstRW<[SwiftWrite4xP2],
+ (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2],
+ (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
+ // Two element structure store.
+ def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
+ (instregex "VST2(d|b)(8|16|32)$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
+ (instregex "VST2(b|d)(8|16|32)wb")>;
+ def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
+ (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
+ (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
+ // Three element structure store.
+ def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST3(d|q)(8|16|32)_UPD",
+ "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+ // Four element structure store.
+ def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm4],
+ (instregex "VST4(d|q)(8|16|32)_UPD",
+ "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+ // Single/all lane store.
+ // One element structure.
+ def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
+ (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
+ (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
+ // Two element structure.
+ def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm2],
+ (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm2],
+ (instregex "VST2LN(d|q)(8|16|32)_UPD",
+ "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
+ // Three element structure.
+ def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST3LN(d|q)(8|16|32)_UPD",
+ "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
+ // Four element structure.
+ def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
+ (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2, SwiftVLDMPerm2],
+ (instregex "VST4LN(d|q)(8|16|32)_UPD",
+ "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
+
+ // 4.2.44 VFP, Divide and Square Root
+ def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
+ let NumMicroOps = 1;
+ let Latency = 17;
+ let ResourceCycles = [1, 15];
+ }
+ def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
+ let NumMicroOps = 1;
+ let Latency = 32;
+ let ResourceCycles = [1, 30];
+ }
+ def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
+ def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
+
+ // Not specified.
+ def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
+ // Preload.
+ def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
+ let ResourceCycles = [0];
+ }
+
}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 41a7e0c2c8..93add6ee33 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -26,7 +26,7 @@ ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
}
SDValue
-ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
@@ -140,7 +140,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
// GNU library uses (ptr, value, size)
// See RTABI section 4.3.4
SDValue ARMSelectionDAGInfo::
-EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVolatile,
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 6419a73729..56c9375855 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -45,7 +45,7 @@ public:
~ARMSelectionDAGInfo();
virtual
- SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
@@ -55,7 +55,7 @@ public:
// Adjust parameters for memset, see RTABI section 4.3.4
virtual
- SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
SDValue Op1, SDValue Op2,
SDValue Op3, unsigned Align,
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 3b8e56fda4..4d204ceafc 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -38,9 +38,24 @@ static cl::opt<bool>
UseFusedMulOps("arm-use-mulops",
cl::init(true), cl::Hidden);
-static cl::opt<bool>
-StrictAlign("arm-strict-align", cl::Hidden,
- cl::desc("Disallow all unaligned memory accesses"));
+enum AlignMode {
+ DefaultAlign,
+ StrictAlign,
+ NoStrictAlign
+};
+
+static cl::opt<AlignMode>
+Align(cl::desc("Load/store alignment support"),
+ cl::Hidden, cl::init(DefaultAlign),
+ cl::values(
+ clEnumValN(DefaultAlign, "arm-default-align",
+ "Generate unaligned accesses only on hardware/OS "
+ "combinations that are known to support them"),
+ clEnumValN(StrictAlign, "arm-strict-align",
+ "Disallow all unaligned memory accesses"),
+ clEnumValN(NoStrictAlign, "arm-no-strict-align",
+ "Allow unaligned memory accesses"),
+ clEnumValEnd));
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, const TargetOptions &Options)
@@ -91,6 +106,7 @@ void ARMSubtarget::initializeEnvironment() {
HasRAS = false;
HasMPExtension = false;
FPOnlySP = false;
+ HasPerfMon = false;
HasTrustZone = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
@@ -162,10 +178,32 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
if (!isThumb() || hasThumb2())
PostRAScheduler = true;
- // v6+ may or may not support unaligned mem access depending on the system
- // configuration.
- if (!StrictAlign && hasV6Ops() && isTargetDarwin())
- AllowsUnalignedMem = true;
+ switch (Align) {
+ case DefaultAlign:
+ // Assume pre-ARMv6 doesn't support unaligned accesses.
+ //
+ // ARMv6 may or may not support unaligned accesses depending on the
+ // SCTLR.U bit, which is architecture-specific. We assume ARMv6
+ // Darwin targets support unaligned accesses, and others don't.
+ //
+ // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
+ // which raises an alignment fault on unaligned accesses. Linux
+ // defaults this bit to 0 and handles it as a system-wide (not
+ // per-process) setting. It is therefore safe to assume that ARMv7+
+ // Linux targets support unaligned accesses. The same goes for NaCl.
+ //
+ // The above behavior is consistent with GCC.
+ AllowsUnalignedMem = (
+ (hasV7Ops() && (isTargetLinux() || isTargetNaCl())) ||
+ (hasV6Ops() && isTargetDarwin()));
+ break;
+ case StrictAlign:
+ AllowsUnalignedMem = false;
+ break;
+ case NoStrictAlign:
+ AllowsUnalignedMem = true;
+ break;
+ }
// NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
uint64_t Bits = getFeatureBits();
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 038eb76ae1..bc5af96c60 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -148,6 +148,11 @@ protected:
/// precision.
bool FPOnlySP;
+ /// If true, the processor supports the Performance Monitor Extensions. These
+ /// include a generic cycle-counter as well as more fine-grained (often
+ /// implementation-specific) events.
+ bool HasPerfMon;
+
/// HasTrustZone - if true, processor supports TrustZone security extensions
bool HasTrustZone;
@@ -254,6 +259,7 @@ public:
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
bool isFPOnlySP() const { return FPOnlySP; }
+ bool hasPerfMon() const { return HasPerfMon; }
bool hasTrustZone() const { return HasTrustZone; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
@@ -270,9 +276,8 @@ public:
bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
- bool isTargetNaCl() const {
- return TargetTriple.getOS() == Triple::NaCl;
- }
+ bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; }
+ bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
bool isTargetELF() const { return !isTargetDarwin(); }
bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 42c7d2c437..17c52c94a0 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -85,6 +85,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget) {
+ initAsmInfo();
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
"support ARM mode execution!");
@@ -117,6 +118,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
FrameLowering(Subtarget.hasThumb2()
? new ARMFrameLowering(Subtarget)
: (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+ initAsmInfo();
}
namespace {
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 114cc9e5c0..c59ca64c11 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -49,6 +49,20 @@ class ARMAsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
const MCRegisterInfo *MRI;
+ // Unwind directives state
+ SMLoc FnStartLoc;
+ SMLoc CantUnwindLoc;
+ SMLoc PersonalityLoc;
+ SMLoc HandlerDataLoc;
+ int FPReg;
+ void resetUnwindDirectiveParserState() {
+ FnStartLoc = SMLoc();
+ CantUnwindLoc = SMLoc();
+ PersonalityLoc = SMLoc();
+ HandlerDataLoc = SMLoc();
+ FPReg = -1;
+ }
+
// Map of register aliases registers via the .req directive.
StringMap<unsigned> RegisterReqs;
@@ -76,7 +90,7 @@ class ARMAsmParser : public MCTargetAsmParser {
if (!inITBlock()) return;
// Move to the next instruction in the IT block, if there is one. If not,
// mark the block as done.
- unsigned TZ = CountTrailingZeros_32(ITState.Mask);
+ unsigned TZ = countTrailingZeros(ITState.Mask);
if (++ITState.CurPosition == 5 - TZ)
ITState.CurPosition = ~0U; // Done with the IT block after this.
}
@@ -86,11 +100,11 @@ class ARMAsmParser : public MCTargetAsmParser {
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
bool Warning(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ ArrayRef<SMRange> Ranges = None) {
return Parser.Warning(L, Msg, Ranges);
}
bool Error(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ ArrayRef<SMRange> Ranges = None) {
return Parser.Error(L, Msg, Ranges);
}
@@ -113,6 +127,14 @@ class ARMAsmParser : public MCTargetAsmParser {
bool parseDirectiveUnreq(SMLoc L);
bool parseDirectiveArch(SMLoc L);
bool parseDirectiveEabiAttr(SMLoc L);
+ bool parseDirectiveFnStart(SMLoc L);
+ bool parseDirectiveFnEnd(SMLoc L);
+ bool parseDirectiveCantUnwind(SMLoc L);
+ bool parseDirectivePersonality(SMLoc L);
+ bool parseDirectiveHandlerData(SMLoc L);
+ bool parseDirectiveSetFP(SMLoc L);
+ bool parseDirectivePad(SMLoc L);
+ bool parseDirectiveRegSave(SMLoc L, bool IsVector);
StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
bool &CarrySetting, unsigned &ProcessorIMod,
@@ -130,12 +152,19 @@ class ARMAsmParser : public MCTargetAsmParser {
bool isThumbTwo() const {
return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2);
}
+ bool hasThumb() const {
+ return STI.getFeatureBits() & ARM::HasV4TOps;
+ }
bool hasV6Ops() const {
return STI.getFeatureBits() & ARM::HasV6Ops;
}
bool hasV7Ops() const {
return STI.getFeatureBits() & ARM::HasV7Ops;
}
+ bool hasARM() const {
+ return !(STI.getFeatureBits() & ARM::FeatureNoARM);
+ }
+
void SwitchMode() {
unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
setAvailableFeatures(FB);
@@ -161,6 +190,8 @@ class ARMAsmParser : public MCTargetAsmParser {
SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseMemBarrierOptOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseInstSyncBarrierOptOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseProcIFlagsOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseMSRMaskOperand(
@@ -242,7 +273,7 @@ public:
};
ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser), FPReg(-1) {
MCAsmParserExtension::Initialize(_Parser);
// Cache the MCRegisterInfo.
@@ -293,6 +324,7 @@ class ARMOperand : public MCParsedAsmOperand {
k_CoprocOption,
k_Immediate,
k_MemBarrierOpt,
+ k_InstSyncBarrierOpt,
k_Memory,
k_PostIndexRegister,
k_MSRMask,
@@ -336,6 +368,10 @@ class ARMOperand : public MCParsedAsmOperand {
ARM_MB::MemBOpt Val;
};
+ struct ISBOptOp {
+ ARM_ISB::InstSyncBOpt Val;
+ };
+
struct IFlagsOp {
ARM_PROC::IFlags Val;
};
@@ -422,6 +458,7 @@ class ARMOperand : public MCParsedAsmOperand {
struct CopOp Cop;
struct CoprocOptionOp CoprocOption;
struct MBOptOp MBOpt;
+ struct ISBOptOp ISBOpt;
struct ITMaskOp ITMask;
struct IFlagsOp IFlags;
struct MMaskOp MMask;
@@ -482,6 +519,8 @@ public:
case k_MemBarrierOpt:
MBOpt = o.MBOpt;
break;
+ case k_InstSyncBarrierOpt:
+ ISBOpt = o.ISBOpt;
case k_Memory:
Memory = o.Memory;
break;
@@ -564,6 +603,11 @@ public:
return MBOpt.Val;
}
+ ARM_ISB::InstSyncBOpt getInstSyncBarrierOpt() const {
+ assert(Kind == k_InstSyncBarrierOpt && "Invalid access!");
+ return ISBOpt.Val;
+ }
+
ARM_PROC::IFlags getProcIFlags() const {
assert(Kind == k_ProcIFlags && "Invalid access!");
return IFlags.Val;
@@ -903,6 +947,7 @@ public:
bool isSPRRegList() const { return Kind == k_SPRRegisterList; }
bool isToken() const { return Kind == k_Token; }
bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; }
+ bool isInstSyncBarrierOpt() const { return Kind == k_InstSyncBarrierOpt; }
bool isMem() const { return Kind == k_Memory; }
bool isShifterImm() const { return Kind == k_ShifterImmediate; }
bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; }
@@ -949,7 +994,7 @@ public:
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Val = CE->getValue();
- return Val > -4096 && Val < 4096;
+ return (Val == INT32_MIN) || (Val > -4096 && Val < 4096);
}
bool isAddrMode3() const {
// If we have an immediate that's not a constant, treat it as a label
@@ -1680,6 +1725,11 @@ public:
Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
}
+ void addInstSyncBarrierOptOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getInstSyncBarrierOpt())));
+ }
+
void addMemNoOffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
@@ -2345,6 +2395,15 @@ public:
return Op;
}
+ static ARMOperand *CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt,
+ SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_InstSyncBarrierOpt);
+ Op->ISBOpt.Val = Opt;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
ARMOperand *Op = new ARMOperand(k_ProcIFlags);
Op->IFlags.Val = IFlags;
@@ -2399,6 +2458,9 @@ void ARMOperand::print(raw_ostream &OS) const {
case k_MemBarrierOpt:
OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
break;
+ case k_InstSyncBarrierOpt:
+ OS << "<ARM_ISB::" << InstSyncBOptToString(getInstSyncBarrierOpt()) << ">";
+ break;
case k_Memory:
OS << "<memory "
<< " base:" << Memory.BaseRegNum;
@@ -3036,7 +3098,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
// There's an optional '#' token here. Normally there wouldn't be, but
// inline assemble puts one in, and it's friendly to accept that.
if (Parser.getTok().is(AsmToken::Hash))
- Parser.Lex(); // Eat the '#'
+ Parser.Lex(); // Eat '#' or '$'.
const MCExpr *LaneIndex;
SMLoc Loc = Parser.getTok().getLoc();
@@ -3354,7 +3416,7 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Tok.is(AsmToken::Dollar) ||
Tok.is(AsmToken::Integer)) {
if (Parser.getTok().isNot(AsmToken::Integer))
- Parser.Lex(); // Eat the '#'.
+ Parser.Lex(); // Eat '#' or '$'.
SMLoc Loc = Parser.getTok().getLoc();
const MCExpr *MemBarrierID;
@@ -3383,6 +3445,57 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
+/// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ unsigned Opt;
+
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef OptStr = Tok.getString();
+
+ if (OptStr.lower() == "sy")
+ Opt = ARM_ISB::SY;
+ else
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ } else if (Tok.is(AsmToken::Hash) ||
+ Tok.is(AsmToken::Dollar) ||
+ Tok.is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat '#' or '$'.
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ const MCExpr *ISBarrierID;
+ if (getParser().parseExpression(ISBarrierID)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ISBarrierID);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+
+ int Val = CE->getValue();
+ if (Val & ~0xf) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Opt = ARM_ISB::RESERVED_0 + Val;
+ } else
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(ARMOperand::CreateInstSyncBarrierOpt(
+ (ARM_ISB::InstSyncBOpt)Opt, S));
+ return MatchOperand_Success;
+}
+
+
/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
@@ -3602,7 +3715,7 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Error(S, "'be' or 'le' operand expected");
return MatchOperand_ParseFail;
}
- int Val = StringSwitch<int>(Tok.getString())
+ int Val = StringSwitch<int>(Tok.getString().lower())
.Case("be", 1)
.Case("le", 0)
.Default(-1);
@@ -3875,7 +3988,7 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Do immediates first, as we always parse those if we have a '#'.
if (Parser.getTok().is(AsmToken::Hash) ||
Parser.getTok().is(AsmToken::Dollar)) {
- Parser.Lex(); // Eat the '#'.
+ Parser.Lex(); // Eat '#' or '$'.
// Explicitly look for a '-', as we need to encode negative zero
// differently.
bool isNegative = Parser.getTok().is(AsmToken::Minus);
@@ -4354,7 +4467,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.getTok().is(AsmToken::Dollar) ||
Parser.getTok().is(AsmToken::Integer)) {
if (Parser.getTok().isNot(AsmToken::Integer))
- Parser.Lex(); // Eat the '#'.
+ Parser.Lex(); // Eat '#' or '$'.
E = Parser.getTok().getLoc();
bool isNegative = getParser().getTok().is(AsmToken::Minus);
@@ -4536,7 +4649,7 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
TyOp->getToken() != ".f64"))
return MatchOperand_NoMatch;
- Parser.Lex(); // Eat the '#'.
+ Parser.Lex(); // Eat '#' or '$'.
// Handle negation, as that still comes through as a separate token.
bool isNegative = false;
@@ -7398,11 +7511,10 @@ processInstruction(MCInst &Inst,
MCOperand &MO = Inst.getOperand(1);
unsigned Mask = MO.getImm();
unsigned OrigMask = Mask;
- unsigned TZ = CountTrailingZeros_32(Mask);
+ unsigned TZ = countTrailingZeros(Mask);
if ((Inst.getOperand(0).getImm() & 1) == 0) {
assert(Mask && TZ <= 3 && "illegal IT mask value!");
- for (unsigned i = 3; i != TZ; --i)
- Mask ^= 1 << i;
+ Mask ^= (0xE << TZ) & 0xF;
}
MO.setImm(Mask);
@@ -7658,6 +7770,24 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveArch(DirectiveID.getLoc());
else if (IDVal == ".eabi_attribute")
return parseDirectiveEabiAttr(DirectiveID.getLoc());
+ else if (IDVal == ".fnstart")
+ return parseDirectiveFnStart(DirectiveID.getLoc());
+ else if (IDVal == ".fnend")
+ return parseDirectiveFnEnd(DirectiveID.getLoc());
+ else if (IDVal == ".cantunwind")
+ return parseDirectiveCantUnwind(DirectiveID.getLoc());
+ else if (IDVal == ".personality")
+ return parseDirectivePersonality(DirectiveID.getLoc());
+ else if (IDVal == ".handlerdata")
+ return parseDirectiveHandlerData(DirectiveID.getLoc());
+ else if (IDVal == ".setfp")
+ return parseDirectiveSetFP(DirectiveID.getLoc());
+ else if (IDVal == ".pad")
+ return parseDirectivePad(DirectiveID.getLoc());
+ else if (IDVal == ".save")
+ return parseDirectiveRegSave(DirectiveID.getLoc(), false);
+ else if (IDVal == ".vsave")
+ return parseDirectiveRegSave(DirectiveID.getLoc(), true);
return true;
}
@@ -7693,6 +7823,9 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
return Error(L, "unexpected token in directive");
Parser.Lex();
+ if (!hasThumb())
+ return Error(L, "target does not support Thumb mode");
+
if (!isThumb())
SwitchMode();
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
@@ -7706,6 +7839,9 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
return Error(L, "unexpected token in directive");
Parser.Lex();
+ if (!hasARM())
+ return Error(L, "target does not support ARM mode");
+
if (isThumb())
SwitchMode();
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
@@ -7795,10 +7931,16 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
Parser.Lex();
if (Val == 16) {
+ if (!hasThumb())
+ return Error(L, "target does not support Thumb mode");
+
if (!isThumb())
SwitchMode();
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
} else {
+ if (!hasARM())
+ return Error(L, "target does not support ARM mode");
+
if (isThumb())
SwitchMode();
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
@@ -7858,6 +8000,219 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
return true;
}
+/// parseDirectiveFnStart
+/// ::= .fnstart
+bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
+ if (FnStartLoc.isValid()) {
+ Error(L, ".fnstart starts before the end of previous one");
+ Error(FnStartLoc, "previous .fnstart starts here");
+ return true;
+ }
+
+ FnStartLoc = L;
+ getParser().getStreamer().EmitFnStart();
+ return false;
+}
+
+/// parseDirectiveFnEnd
+/// ::= .fnend
+bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
+ // Check the ordering of unwind directives
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .fnend directive");
+
+ // Reset the unwind directives parser state
+ resetUnwindDirectiveParserState();
+
+ getParser().getStreamer().EmitFnEnd();
+ return false;
+}
+
+/// parseDirectiveCantUnwind
+/// ::= .cantunwind
+bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) {
+ // Check the ordering of unwind directives
+ CantUnwindLoc = L;
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .cantunwind directive");
+ if (HandlerDataLoc.isValid()) {
+ Error(L, ".cantunwind can't be used with .handlerdata directive");
+ Error(HandlerDataLoc, ".handlerdata was specified here");
+ return true;
+ }
+ if (PersonalityLoc.isValid()) {
+ Error(L, ".cantunwind can't be used with .personality directive");
+ Error(PersonalityLoc, ".personality was specified here");
+ return true;
+ }
+
+ getParser().getStreamer().EmitCantUnwind();
+ return false;
+}
+
+/// parseDirectivePersonality
+/// ::= .personality name
+bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
+ // Check the ordering of unwind directives
+ PersonalityLoc = L;
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .personality directive");
+ if (CantUnwindLoc.isValid()) {
+ Error(L, ".personality can't be used with .cantunwind directive");
+ Error(CantUnwindLoc, ".cantunwind was specified here");
+ return true;
+ }
+ if (HandlerDataLoc.isValid()) {
+ Error(L, ".personality must precede .handlerdata directive");
+ Error(HandlerDataLoc, ".handlerdata was specified here");
+ return true;
+ }
+
+ // Parse the name of the personality routine
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Parser.eatToEndOfStatement();
+ return Error(L, "unexpected input in .personality directive.");
+ }
+ StringRef Name(Parser.getTok().getIdentifier());
+ Parser.Lex();
+
+ MCSymbol *PR = getParser().getContext().GetOrCreateSymbol(Name);
+ getParser().getStreamer().EmitPersonality(PR);
+ return false;
+}
+
+/// parseDirectiveHandlerData
+/// ::= .handlerdata
+bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
+ // Check the ordering of unwind directives
+ HandlerDataLoc = L;
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .personality directive");
+ if (CantUnwindLoc.isValid()) {
+ Error(L, ".handlerdata can't be used with .cantunwind directive");
+ Error(CantUnwindLoc, ".cantunwind was specified here");
+ return true;
+ }
+
+ getParser().getStreamer().EmitHandlerData();
+ return false;
+}
+
+/// parseDirectiveSetFP
+/// ::= .setfp fpreg, spreg [, offset]
+bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
+ // Check the ordering of unwind directives
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .setfp directive");
+ if (HandlerDataLoc.isValid())
+ return Error(L, ".setfp must precede .handlerdata directive");
+
+ // Parse fpreg
+ SMLoc NewFPRegLoc = Parser.getTok().getLoc();
+ int NewFPReg = tryParseRegister();
+ if (NewFPReg == -1)
+ return Error(NewFPRegLoc, "frame pointer register expected");
+
+ // Consume comma
+ if (!Parser.getTok().is(AsmToken::Comma))
+ return Error(Parser.getTok().getLoc(), "comma expected");
+ Parser.Lex(); // skip comma
+
+ // Parse spreg
+ SMLoc NewSPRegLoc = Parser.getTok().getLoc();
+ int NewSPReg = tryParseRegister();
+ if (NewSPReg == -1)
+ return Error(NewSPRegLoc, "stack pointer register expected");
+
+ if (NewSPReg != ARM::SP && NewSPReg != FPReg)
+ return Error(NewSPRegLoc,
+ "register should be either $sp or the latest fp register");
+
+ // Update the frame pointer register
+ FPReg = NewFPReg;
+
+ // Parse offset
+ int64_t Offset = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // skip comma
+
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ return Error(Parser.getTok().getLoc(), "'#' expected");
+ }
+ Parser.Lex(); // skip hash token.
+
+ const MCExpr *OffsetExpr;
+ SMLoc ExLoc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ if (getParser().parseExpression(OffsetExpr, EndLoc))
+ return Error(ExLoc, "malformed setfp offset");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
+ if (!CE)
+ return Error(ExLoc, "setfp offset must be an immediate");
+
+ Offset = CE->getValue();
+ }
+
+ getParser().getStreamer().EmitSetFP(static_cast<unsigned>(NewFPReg),
+ static_cast<unsigned>(NewSPReg),
+ Offset);
+ return false;
+}
+
+/// parseDirective
+/// ::= .pad offset
+bool ARMAsmParser::parseDirectivePad(SMLoc L) {
+ // Check the ordering of unwind directives
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .pad directive");
+ if (HandlerDataLoc.isValid())
+ return Error(L, ".pad must precede .handlerdata directive");
+
+ // Parse the offset
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ return Error(Parser.getTok().getLoc(), "'#' expected");
+ }
+ Parser.Lex(); // skip hash token.
+
+ const MCExpr *OffsetExpr;
+ SMLoc ExLoc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ if (getParser().parseExpression(OffsetExpr, EndLoc))
+ return Error(ExLoc, "malformed pad offset");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
+ if (!CE)
+ return Error(ExLoc, "pad offset must be an immediate");
+
+ getParser().getStreamer().EmitPad(CE->getValue());
+ return false;
+}
+
+/// parseDirectiveRegSave
+/// ::= .save { registers }
+/// ::= .vsave { registers }
+bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
+ // Check the ordering of unwind directives
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .save or .vsave directives");
+ if (HandlerDataLoc.isValid())
+ return Error(L, ".save or .vsave must precede .handlerdata directive");
+
+ // Parse the register list
+ SmallVector<MCParsedAsmOperand*, 1> Operands;
+ if (parseRegisterList(Operands))
+ return true;
+ ARMOperand *Op = (ARMOperand*)Operands[0];
+ if (!IsVector && !Op->isRegList())
+ return Error(L, ".save expects GPR registers");
+ if (IsVector && !Op->isDPRRegList())
+ return Error(L, ".vsave expects DPR registers");
+
+ getParser().getStreamer().EmitRegSave(Op->getRegList(), IsVector);
+ return false;
+}
+
/// Force static initialization.
extern "C" void LLVMInitializeARMAsmParser() {
RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget);
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ac937f3534..a6eab33af3 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -65,7 +65,7 @@ namespace {
void setITState(char Firstcond, char Mask) {
// (3 - the number of trailing zeros) is the number of then / else.
unsigned CondBit0 = Firstcond & 1;
- unsigned NumTZ = CountTrailingZeros_32(Mask);
+ unsigned NumTZ = countTrailingZeros<uint8_t>(Mask);
unsigned char CCBits = static_cast<unsigned char>(Firstcond & 0xf);
assert(NumTZ <= 3 && "Invalid IT mask!");
// push condition codes onto the stack the correct order for the pops
@@ -156,12 +156,17 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -236,6 +241,14 @@ static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val,
@@ -268,6 +281,8 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
@@ -348,6 +363,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
@@ -402,7 +419,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
"Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!");
// We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ if (Region.readBytes(Address, 4, bytes) == -1) {
Size = 0;
return MCDisassembler::Fail;
}
@@ -492,102 +509,9 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
bool isBranch, uint64_t InstSize,
MCInst &MI, const void *Decoder) {
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
- LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
- struct LLVMOpInfo1 SymbolicOp;
- memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
- SymbolicOp.Value = Value;
- void *DisInfo = Dis->getDisInfoBlock();
-
- if (!getOpInfo ||
- !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
- // Clear SymbolicOp.Value from above and also all other fields.
- memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
- LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
- if (!SymbolLookUp)
- return false;
- uint64_t ReferenceType;
- if (isBranch)
- ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
- else
- ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
- const char *ReferenceName;
- uint64_t SymbolValue = 0x00000000ffffffffULL & Value;
- const char *Name = SymbolLookUp(DisInfo, SymbolValue, &ReferenceType,
- Address, &ReferenceName);
- if (Name) {
- SymbolicOp.AddSymbol.Name = Name;
- SymbolicOp.AddSymbol.Present = true;
- }
- // For branches always create an MCExpr so it gets printed as hex address.
- else if (isBranch) {
- SymbolicOp.Value = Value;
- }
- if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
- (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
- if (!Name && !isBranch)
- return false;
- }
-
- MCContext *Ctx = Dis->getMCContext();
- const MCExpr *Add = NULL;
- if (SymbolicOp.AddSymbol.Present) {
- if (SymbolicOp.AddSymbol.Name) {
- StringRef Name(SymbolicOp.AddSymbol.Name);
- MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
- Add = MCSymbolRefExpr::Create(Sym, *Ctx);
- } else {
- Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
- }
- }
-
- const MCExpr *Sub = NULL;
- if (SymbolicOp.SubtractSymbol.Present) {
- if (SymbolicOp.SubtractSymbol.Name) {
- StringRef Name(SymbolicOp.SubtractSymbol.Name);
- MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
- Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
- } else {
- Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
- }
- }
-
- const MCExpr *Off = NULL;
- if (SymbolicOp.Value != 0)
- Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
-
- const MCExpr *Expr;
- if (Sub) {
- const MCExpr *LHS;
- if (Add)
- LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
- else
- LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
- if (Off != 0)
- Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
- else
- Expr = LHS;
- } else if (Add) {
- if (Off != 0)
- Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
- else
- Expr = Add;
- } else {
- if (Off != 0)
- Expr = Off;
- else
- Expr = MCConstantExpr::Create(0, *Ctx);
- }
-
- if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
- MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
- else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
- MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
- else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
- MI.addOperand(MCOperand::CreateExpr(Expr));
- else
- llvm_unreachable("bad SymbolicOp.VariantKind");
-
- return true;
+ // FIXME: Does it make sense for value to be negative?
+ return Dis->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, isBranch,
+ /* Offset */ 0, InstSize);
}
/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
@@ -602,17 +526,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
const void *Decoder) {
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
- LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
- if (SymbolLookUp) {
- void *DisInfo = Dis->getDisInfoBlock();
- uint64_t ReferenceType;
- ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
- const char *ReferenceName;
- (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
- if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr ||
- ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
- (*Dis->CommentStream) << "literal pool for: " << ReferenceName;
- }
+ Dis->tryAddingPcLoadReferenceComment(Value, Address);
}
// Thumb1 instructions don't have explicit S bits. Rather, they
@@ -751,7 +665,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
"Asked to disassemble in Thumb mode but Subtarget is in ARM mode!");
// We want to read exactly 2 bytes of data.
- if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) {
+ if (Region.readBytes(Address, 2, bytes) == -1) {
Size = 0;
return MCDisassembler::Fail;
}
@@ -803,7 +717,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
// We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ if (Region.readBytes(Address, 4, bytes) == -1) {
Size = 0;
return MCDisassembler::Fail;
}
@@ -920,6 +834,21 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
return S;
}
+static DecodeStatus
+DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo == 15)
+ {
+ Inst.addOperand(MCOperand::CreateReg(ARM::APSR_NZCV));
+ return MCDisassembler::Success;
+ }
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+ return S;
+}
+
static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 7)
@@ -927,6 +856,26 @@ static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
+static const uint16_t GPRPairDecoderTable[] = {
+ ARM::R0_R1, ARM::R2_R3, ARM::R4_R5, ARM::R6_R7,
+ ARM::R8_R9, ARM::R10_R11, ARM::R12_SP
+};
+
+static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo > 13)
+ return MCDisassembler::Fail;
+
+ if ((RegNo & 1) || RegNo == 0xe)
+ S = MCDisassembler::SoftFail;
+
+ unsigned RegisterPair = GPRPairDecoderTable[RegNo/2];
+ Inst.addOperand(MCOperand::CreateReg(RegisterPair));
+ return S;
+}
+
static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
unsigned Register = 0;
@@ -1030,7 +979,7 @@ static const uint16_t QPRDecoderTable[] = {
static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
- if (RegNo > 31)
+ if (RegNo > 31 || (RegNo & 1) != 0)
return MCDisassembler::Fail;
RegNo >>= 1;
@@ -1206,7 +1155,7 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
}
// Empty register lists are not allowed.
- if (CountPopulation_32(Val) == 0) return MCDisassembler::Fail;
+ if (Val == 0) return MCDisassembler::Fail;
for (unsigned i = 0; i < 16; ++i) {
if (Val & (1 << i)) {
if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
@@ -1227,6 +1176,13 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
unsigned Vd = fieldFromInstruction(Val, 8, 5);
unsigned regs = fieldFromInstruction(Val, 0, 8);
+ // In case of unpredictable encoding, tweak the operands.
+ if (regs == 0 || (Vd + regs) > 32) {
+ regs = Vd + regs > 32 ? 32 - Vd : regs;
+ regs = std::max( 1u, regs);
+ S = MCDisassembler::SoftFail;
+ }
+
if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
for (unsigned i = 0; i < (regs - 1); ++i) {
@@ -1242,9 +1198,15 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
DecodeStatus S = MCDisassembler::Success;
unsigned Vd = fieldFromInstruction(Val, 8, 5);
- unsigned regs = fieldFromInstruction(Val, 0, 8);
+ unsigned regs = fieldFromInstruction(Val, 1, 7);
- regs = regs >> 1;
+ // In case of unpredictable encoding, tweak the operands.
+ if (regs == 0 || regs > 16 || (Vd + regs) > 32) {
+ regs = Vd + regs > 32 ? 32 - Vd : regs;
+ regs = std::max( 1u, regs);
+ regs = std::min(16u, regs);
+ S = MCDisassembler::SoftFail;
+ }
if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -1797,6 +1759,29 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
return S;
}
+static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (pred == 0xF)
+ return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
unsigned Insn,
uint64_t Address, const void *Decoder) {
@@ -1807,6 +1792,7 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
unsigned reglist = fieldFromInstruction(Insn, 0, 16);
if (pred == 0xF) {
+ // Ambiguous with RFE and SRS
switch (Inst.getOpcode()) {
case ARM::LDMDA:
Inst.setOpcode(ARM::RFEDA);
@@ -1857,11 +1843,16 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
Inst.setOpcode(ARM::SRSIB_UPD);
break;
default:
- if (!Check(S, MCDisassembler::Fail)) return MCDisassembler::Fail;
+ return MCDisassembler::Fail;
}
// For stores (which become SRS's, the only operand is the mode.
if (fieldFromInstruction(Insn, 20, 1) == 0) {
+ // Check SRS encoding constraints
+ if (!(fieldFromInstruction(Insn, 22, 1) == 1 &&
+ fieldFromInstruction(Insn, 20, 1) == 0))
+ return MCDisassembler::Fail;
+
Inst.addOperand(
MCOperand::CreateImm(fieldFromInstruction(Insn, 0, 4)));
return S;
@@ -1891,6 +1882,13 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
+ // This decoder is called from multiple location that do not check
+ // the full encoding is valid before they do.
+ if (fieldFromInstruction(Insn, 5, 1) != 0 ||
+ fieldFromInstruction(Insn, 16, 1) != 0 ||
+ fieldFromInstruction(Insn, 20, 8) != 0x10)
+ return MCDisassembler::Fail;
+
// imod == '01' --> UNPREDICTABLE
// NOTE: Even though this is technically UNPREDICTABLE, we choose to
// return failure here. The '01' imod value is unprintable, so there's
@@ -2432,6 +2430,57 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
return S;
}
+static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned type = fieldFromInstruction(Insn, 8, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 2);
+ if (type == 6 && (align & 2)) return MCDisassembler::Fail;
+ if (type == 7 && (align & 2)) return MCDisassembler::Fail;
+ if (type == 10 && align == 3) return MCDisassembler::Fail;
+
+ unsigned load = fieldFromInstruction(Insn, 21, 1);
+ return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder)
+ : DecodeVSTInstruction(Inst, Insn, Address, Decoder);
+}
+
+static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ if (size == 3) return MCDisassembler::Fail;
+
+ unsigned type = fieldFromInstruction(Insn, 8, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 2);
+ if (type == 8 && align == 3) return MCDisassembler::Fail;
+ if (type == 9 && align == 3) return MCDisassembler::Fail;
+
+ unsigned load = fieldFromInstruction(Insn, 21, 1);
+ return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder)
+ : DecodeVSTInstruction(Inst, Insn, Address, Decoder);
+}
+
+static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ if (size == 3) return MCDisassembler::Fail;
+
+ unsigned align = fieldFromInstruction(Insn, 4, 2);
+ if (align & 2) return MCDisassembler::Fail;
+
+ unsigned load = fieldFromInstruction(Insn, 21, 1);
+ return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder)
+ : DecodeVSTInstruction(Inst, Insn, Address, Decoder);
+}
+
+static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ if (size == 3) return MCDisassembler::Fail;
+
+ unsigned load = fieldFromInstruction(Insn, 21, 1);
+ return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder)
+ : DecodeVSTInstruction(Inst, Insn, Address, Decoder);
+}
+
static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3536,6 +3585,15 @@ static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val & ~0xf)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (!Val) return MCDisassembler::Fail;
@@ -3551,11 +3609,10 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
unsigned pred = fieldFromInstruction(Insn, 28, 4);
- if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
+ if (Rn == 0xF)
+ S = MCDisassembler::SoftFail;
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ if (!Check(S, DecodeGPRPairRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3565,7 +3622,6 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder){
DecodeStatus S = MCDisassembler::Success;
@@ -3578,12 +3634,10 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
- if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
- if (Rd == Rn || Rd == Rt || Rd == Rt+1) return MCDisassembler::Fail;
+ if (Rn == 0xF || Rd == Rn || Rd == Rt || Rd == Rt+1)
+ S = MCDisassembler::SoftFail;
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ if (!Check(S, DecodeGPRPairRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -4453,16 +4507,18 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
unsigned imm = fieldFromInstruction(Insn, 16, 6);
unsigned cmode = fieldFromInstruction(Insn, 8, 4);
+ unsigned op = fieldFromInstruction(Insn, 5, 1);
DecodeStatus S = MCDisassembler::Success;
// VMOVv2f32 is ambiguous with these decodings.
if (!(imm & 0x38) && cmode == 0xF) {
+ if (op == 1) return MCDisassembler::Fail;
Inst.setOpcode(ARM::VMOVv2f32);
return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
}
- if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+ if (!(imm & 0x20)) return MCDisassembler::Fail;
if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -4481,16 +4537,18 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
unsigned imm = fieldFromInstruction(Insn, 16, 6);
unsigned cmode = fieldFromInstruction(Insn, 8, 4);
+ unsigned op = fieldFromInstruction(Insn, 5, 1);
DecodeStatus S = MCDisassembler::Success;
// VMOVv4f32 is ambiguous with these decodings.
if (!(imm & 0x38) && cmode == 0xF) {
+ if (op == 1) return MCDisassembler::Fail;
Inst.setOpcode(ARM::VMOVv4f32);
return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
}
- if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+ if (!(imm & 0x20)) return MCDisassembler::Fail;
if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 3bcd083a35..7fef795b23 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -660,8 +660,8 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
uint32_t v = ~MO.getImm();
- int32_t lsb = CountTrailingZeros_32(v);
- int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
+ int32_t lsb = countTrailingZeros(v);
+ int32_t width = (32 - countLeadingZeros (v)) - lsb;
assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
O << markup("<imm:") << '#' << lsb << markup(">")
<< ", "
@@ -674,6 +674,12 @@ void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
O << ARM_MB::MemBOptToString(val);
}
+void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_ISB::InstSyncBOptToString(val);
+}
+
void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned ShiftOp = MI->getOperand(OpNum).getImm();
@@ -931,7 +937,7 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
unsigned Mask = MI->getOperand(OpNum).getImm();
unsigned Firstcond = MI->getOperand(OpNum-1).getImm();
unsigned CondBit0 = Firstcond & 1;
- unsigned NumTZ = CountTrailingZeros_32(Mask);
+ unsigned NumTZ = countTrailingZeros(Mask);
assert(NumTZ <= 3 && "Invalid IT mask!");
for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
bool T = ((Mask >> Pos) & 1) == CondBit0;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 344104e873..5a6434886c 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -71,6 +71,7 @@ public:
void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printInstSyncBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 62473b2bfd..b6c85c2e94 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -140,7 +140,7 @@ namespace ARM_AM {
if ((Imm & ~255U) == 0) return 0;
// Use CTZ to compute the rotate amount.
- unsigned TZ = CountTrailingZeros_32(Imm);
+ unsigned TZ = countTrailingZeros(Imm);
// Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
// not 9.
@@ -153,7 +153,7 @@ namespace ARM_AM {
// For values like 0xF000000F, we should ignore the low 6 bits, then
// retry the hunt.
if (Imm & 63U) {
- unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U);
+ unsigned TZ2 = countTrailingZeros(Imm & ~63U);
unsigned RotAmt2 = TZ2 & ~1;
if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
return (32-RotAmt2)&31; // HW rotates right, not left.
@@ -221,7 +221,7 @@ namespace ARM_AM {
if ((Imm & ~255U) == 0) return 0;
// Use CTZ to compute the shift amount.
- return CountTrailingZeros_32(Imm);
+ return countTrailingZeros(Imm);
}
/// isThumbImmShiftedVal - Return true if the specified value can be obtained
@@ -240,7 +240,7 @@ namespace ARM_AM {
if ((Imm & ~65535U) == 0) return 0;
// Use CTZ to compute the shift amount.
- return CountTrailingZeros_32(Imm);
+ return countTrailingZeros(Imm);
}
/// isThumbImm16ShiftedVal - Return true if the specified value can be
@@ -296,7 +296,7 @@ namespace ARM_AM {
/// encoding is possible.
/// See ARM Reference Manual A6.3.2.
static inline int getT2SOImmValRotateVal(unsigned V) {
- unsigned RotAmt = CountLeadingZeros_32(V);
+ unsigned RotAmt = countLeadingZeros(V);
if (RotAmt >= 24)
return -1;
@@ -328,7 +328,7 @@ namespace ARM_AM {
static inline unsigned getT2SOImmValRotate(unsigned V) {
if ((V & ~255U) == 0) return 0;
// Use CTZ to compute the rotate amount.
- unsigned RotAmt = CountTrailingZeros_32(V);
+ unsigned RotAmt = countTrailingZeros(V);
return (32 - RotAmt) & 31;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index e66e985678..8baa3a6ce6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -419,7 +419,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
uint32_t imm10Bits = (offset & 0x1FF800) >> 11;
uint32_t imm11Bits = (offset & 0x000007FF);
-
+
uint32_t Binary = 0;
uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
@@ -434,8 +434,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as
// imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00)
// where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S).
- // The value is encoded into disjoint bit positions in the destination
- // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
+ // The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
// J = either J1 or J2 bit, 0 = zero.
//
// BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0
@@ -450,10 +450,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
uint32_t imm10HBits = (offset & 0xFFC00) >> 10;
uint32_t imm10LBits = (offset & 0x3FF);
-
+
uint32_t Binary = 0;
uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
- uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
((uint16_t)imm10LBits) << 1);
Binary |= secondHalf << 16;
Binary |= firstHalf;
@@ -680,8 +680,11 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef
return new DarwinARMAsmBackend(T, TT, CS);
}
- if (TheTriple.isOSWindows())
+#if 0
+ // FIXME: Introduce yet another checker but assert(0).
+ if (TheTriple.isOSBinFormatCOFF())
assert(0 && "Windows not supported on ARM");
+#endif
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
return new ELFARMAsmBackend(T, TT, OSABI);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index de48a0e0f3..ff9917d793 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -161,6 +161,49 @@ namespace ARM_MB {
}
} // namespace ARM_MB
+namespace ARM_ISB {
+ enum InstSyncBOpt {
+ RESERVED_0 = 0,
+ RESERVED_1 = 1,
+ RESERVED_2 = 2,
+ RESERVED_3 = 3,
+ RESERVED_4 = 4,
+ RESERVED_5 = 5,
+ RESERVED_6 = 6,
+ RESERVED_7 = 7,
+ RESERVED_8 = 8,
+ RESERVED_9 = 9,
+ RESERVED_10 = 10,
+ RESERVED_11 = 11,
+ RESERVED_12 = 12,
+ RESERVED_13 = 13,
+ RESERVED_14 = 14,
+ SY = 15
+ };
+
+ inline static const char *InstSyncBOptToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unkown memory operation");
+ case RESERVED_0: return "#0x0";
+ case RESERVED_1: return "#0x1";
+ case RESERVED_2: return "#0x2";
+ case RESERVED_3: return "#0x3";
+ case RESERVED_4: return "#0x4";
+ case RESERVED_5: return "#0x5";
+ case RESERVED_6: return "#0x6";
+ case RESERVED_7: return "#0x7";
+ case RESERVED_8: return "#0x8";
+ case RESERVED_9: return "#0x9";
+ case RESERVED_10: return "#0xa";
+ case RESERVED_11: return "#0xb";
+ case RESERVED_12: return "#0xc";
+ case RESERVED_13: return "#0xd";
+ case RESERVED_14: return "#0xe";
+ case SY: return "sy";
+ }
+ }
+} // namespace ARM_ISB
+
/// isARMLowRegister - Returns true if the register is a low register (r0-r7).
///
static inline bool isARMLowRegister(unsigned Reg) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6c3d247668..679d3c4a85 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -203,7 +203,8 @@ private:
void Reset();
void EmitPersonalityFixup(StringRef Name);
- void CollectUnwindOpcodes();
+ void FlushPendingOffset();
+ void FlushUnwindOpcodes(bool AllowCompactModel0);
void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
SectionKind Kind, const MCSymbol &Fn);
@@ -220,13 +221,14 @@ private:
MCSymbol *ExTab;
MCSymbol *FnStart;
const MCSymbol *Personality;
- uint32_t VFPRegSave; // Register mask for {d31-d0}
- uint32_t RegSave; // Register mask for {r15-r0}
- int64_t SPOffset;
- uint16_t FPReg;
- int64_t FPOffset;
+ unsigned PersonalityIndex;
+ unsigned FPReg; // Frame pointer register
+ int64_t FPOffset; // Offset: (final frame pointer) - (initial $sp)
+ int64_t SPOffset; // Offset: (final $sp) - (initial $sp)
+ int64_t PendingOffset; // Offset: (final $sp) - (emitted $sp)
bool UsedFP;
bool CantUnwind;
+ SmallVector<uint8_t, 64> Opcodes;
UnwindOpcodeAssembler UnwindOpAsm;
};
} // end anonymous namespace
@@ -279,19 +281,18 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
}
void ARMELFStreamer::Reset() {
- const MCRegisterInfo &MRI = getContext().getRegisterInfo();
-
ExTab = NULL;
FnStart = NULL;
Personality = NULL;
- VFPRegSave = 0;
- RegSave = 0;
- FPReg = MRI.getEncodingValue(ARM::SP);
+ PersonalityIndex = NUM_PERSONALITY_INDEX;
+ FPReg = ARM::SP;
FPOffset = 0;
SPOffset = 0;
+ PendingOffset = 0;
UsedFP = false;
CantUnwind = false;
+ Opcodes.clear();
UnwindOpAsm.Reset();
}
@@ -311,18 +312,6 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
MCFixup::getKindForSize(4, false)));
}
-void ARMELFStreamer::CollectUnwindOpcodes() {
- if (UsedFP) {
- UnwindOpAsm.EmitSetFP(FPReg);
- UnwindOpAsm.EmitSPOffset(-FPOffset);
- } else {
- UnwindOpAsm.EmitSPOffset(SPOffset);
- }
- UnwindOpAsm.EmitVFPRegSave(VFPRegSave);
- UnwindOpAsm.EmitRegSave(RegSave);
- UnwindOpAsm.Finalize();
-}
-
void ARMELFStreamer::EmitFnStart() {
assert(FnStart == 0);
FnStart = getContext().CreateTempSymbol();
@@ -333,27 +322,12 @@ void ARMELFStreamer::EmitFnEnd() {
assert(FnStart && ".fnstart must preceeds .fnend");
// Emit unwind opcodes if there is no .handlerdata directive
- if (!ExTab && !CantUnwind) {
- CollectUnwindOpcodes();
-
- unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
- if (PersonalityIndex == AEABI_UNWIND_CPP_PR1 ||
- PersonalityIndex == AEABI_UNWIND_CPP_PR2) {
- // For the __aeabi_unwind_cpp_pr1 and __aeabi_unwind_cpp_pr2, we have to
- // emit the unwind opcodes in the corresponding ".ARM.extab" section, and
- // then emit a reference to these unwind opcodes in the second word of
- // the exception index table entry.
- SwitchToExTabSection(*FnStart);
- ExTab = getContext().CreateTempSymbol();
- EmitLabel(ExTab);
- EmitBytes(UnwindOpAsm.data(), 0);
- }
- }
+ if (!ExTab && !CantUnwind)
+ FlushUnwindOpcodes(true);
// Emit the exception index table entry
SwitchToExIdxSection(*FnStart);
- unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
if (PersonalityIndex < NUM_PERSONALITY_INDEX)
EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex));
@@ -379,11 +353,15 @@ void ARMELFStreamer::EmitFnEnd() {
// opcodes should always be 4 bytes.
assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 &&
"Compact model must use __aeabi_cpp_unwind_pr0 as personality");
- assert(UnwindOpAsm.size() == 4u &&
+ assert(Opcodes.size() == 4u &&
"Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4");
- EmitBytes(UnwindOpAsm.data(), 0);
+ EmitBytes(StringRef(reinterpret_cast<const char*>(Opcodes.data()),
+ Opcodes.size()), 0);
}
+ // Switch to the section containing FnStart
+ SwitchSection(&FnStart->getSection());
+
// Clean exception handling frame information
Reset();
}
@@ -392,7 +370,34 @@ void ARMELFStreamer::EmitCantUnwind() {
CantUnwind = true;
}
-void ARMELFStreamer::EmitHandlerData() {
+void ARMELFStreamer::FlushPendingOffset() {
+ if (PendingOffset != 0) {
+ UnwindOpAsm.EmitSPOffset(-PendingOffset);
+ PendingOffset = 0;
+ }
+}
+
+void ARMELFStreamer::FlushUnwindOpcodes(bool AllowCompactModel0) {
+ // Emit the unwind opcode to restore $sp.
+ if (UsedFP) {
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+ int64_t LastRegSaveSPOffset = SPOffset - PendingOffset;
+ UnwindOpAsm.EmitSPOffset(LastRegSaveSPOffset - FPOffset);
+ UnwindOpAsm.EmitSetSP(MRI.getEncodingValue(FPReg));
+ } else {
+ FlushPendingOffset();
+ }
+
+ // Finalize the unwind opcode sequence
+ UnwindOpAsm.Finalize(PersonalityIndex, Opcodes);
+
+ // For compact model 0, we have to emit the unwind opcodes in the .ARM.exidx
+ // section. Thus, we don't have to create an entry in the .ARM.extab
+ // section.
+ if (AllowCompactModel0 && PersonalityIndex == AEABI_UNWIND_CPP_PR0)
+ return;
+
+ // Switch to .ARM.extab section.
SwitchToExTabSection(*FnStart);
// Create .ARM.extab label for offset in .ARM.exidx
@@ -400,19 +405,23 @@ void ARMELFStreamer::EmitHandlerData() {
ExTab = getContext().CreateTempSymbol();
EmitLabel(ExTab);
- // Emit Personality
- assert(Personality && ".personality directive must preceed .handlerdata");
-
- const MCSymbolRefExpr *PersonalityRef =
- MCSymbolRefExpr::Create(Personality,
- MCSymbolRefExpr::VK_ARM_PREL31,
- getContext());
+ // Emit personality
+ if (Personality) {
+ const MCSymbolRefExpr *PersonalityRef =
+ MCSymbolRefExpr::Create(Personality,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
- EmitValue(PersonalityRef, 4, 0);
+ EmitValue(PersonalityRef, 4, 0);
+ }
// Emit unwind opcodes
- CollectUnwindOpcodes();
- EmitBytes(UnwindOpAsm.data(), 0);
+ EmitBytes(StringRef(reinterpret_cast<const char *>(Opcodes.data()),
+ Opcodes.size()), 0);
+}
+
+void ARMELFStreamer::EmitHandlerData() {
+ FlushUnwindOpcodes(false);
}
void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
@@ -423,42 +432,55 @@ void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
void ARMELFStreamer::EmitSetFP(unsigned NewFPReg,
unsigned NewSPReg,
int64_t Offset) {
- assert(SPOffset == 0 &&
- "Current implementation assumes .setfp precedes .pad");
-
- const MCRegisterInfo &MRI = getContext().getRegisterInfo();
-
- uint16_t NewFPRegEncVal = MRI.getEncodingValue(NewFPReg);
-#ifndef NDEBUG
- uint16_t NewSPRegEncVal = MRI.getEncodingValue(NewSPReg);
-#endif
-
- assert((NewSPReg == ARM::SP || NewSPRegEncVal == FPReg) &&
+ assert((NewSPReg == ARM::SP || NewSPReg == FPReg) &&
"the operand of .setfp directive should be either $sp or $fp");
UsedFP = true;
- FPReg = NewFPRegEncVal;
- FPOffset = Offset;
+ FPReg = NewFPReg;
+
+ if (NewSPReg == ARM::SP)
+ FPOffset = SPOffset + Offset;
+ else
+ FPOffset += Offset;
}
void ARMELFStreamer::EmitPad(int64_t Offset) {
- SPOffset += Offset;
+ // Track the change of the $sp offset
+ SPOffset -= Offset;
+
+ // To squash multiple .pad directives, we should delay the unwind opcode
+ // until the .save, .vsave, .handlerdata, or .fnend directives.
+ PendingOffset -= Offset;
}
void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool IsVector) {
+ // Collect the registers in the register list
+ unsigned Count = 0;
+ uint32_t Mask = 0;
const MCRegisterInfo &MRI = getContext().getRegisterInfo();
-
-#ifndef NDEBUG
- unsigned Max = IsVector ? 32 : 16;
-#endif
- uint32_t &RegMask = IsVector ? VFPRegSave : RegSave;
-
for (size_t i = 0; i < RegList.size(); ++i) {
unsigned Reg = MRI.getEncodingValue(RegList[i]);
- assert(Reg < Max && "Register encoded value out of range");
- RegMask |= 1u << Reg;
+ assert(Reg < (IsVector ? 32U : 16U) && "Register out of range");
+ unsigned Bit = (1u << Reg);
+ if ((Mask & Bit) == 0) {
+ Mask |= Bit;
+ ++Count;
+ }
}
+
+ // Track the change the $sp offset: For the .save directive, the
+ // corresponding push instruction will decrease the $sp by (4 * Count).
+ // For the .vsave directive, the corresponding vpush instruction will
+ // decrease $sp by (8 * Count).
+ SPOffset -= Count * (IsVector ? 8 : 4);
+
+ // Emit the opcode
+ FlushPendingOffset();
+ if (IsVector)
+ UnwindOpAsm.EmitVFPRegSave(Mask);
+ else
+ UnwindOpAsm.EmitRegSave(Mask);
}
namespace llvm {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 7a59a7dd50..2aa1010217 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1359,8 +1359,8 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
// msb of the mask.
const MCOperand &MO = MI.getOperand(Op);
uint32_t v = ~MO.getImm();
- uint32_t lsb = CountTrailingZeros_32(v);
- uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1;
+ uint32_t lsb = countTrailingZeros(v);
+ uint32_t msb = (32 - countLeadingZeros (v)) - 1;
assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
return lsb | (msb << 5);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index f09fb5a94f..14fd03fad8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -61,6 +61,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
unsigned SubVer = TT[Idx];
if (SubVer >= '7' && SubVer <= '9') {
if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+ isThumb = true;
if (NoCPU)
// v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
@@ -99,6 +100,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
ARMArchFeature = "+v6t2";
else if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+ isThumb = true;
if (NoCPU)
// v6m: FeatureNoARM, FeatureMClass
ARMArchFeature = "+v6,+noarm,+mclass";
@@ -159,7 +161,7 @@ static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
return X;
}
-static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
+static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
@@ -212,6 +214,15 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T,
return 0;
}
+static MCRelocationInfo *createARMMCRelocationInfo(StringRef TT,
+ MCContext &Ctx) {
+ Triple TheTriple(TT);
+ if (TheTriple.isEnvironmentMachO())
+ return createARMMachORelocationInfo(Ctx);
+ // Default to the stock relocation info.
+ return llvm::createMCRelocationInfo(TT, Ctx);
+}
+
namespace {
class ARMMCInstrAnalysis : public MCInstrAnalysis {
@@ -232,15 +243,16 @@ public:
return MCInstrAnalysis::isConditionalBranch(Inst);
}
- uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
- uint64_t Size) const {
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const {
// We only handle PCRel branches for now.
if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
- return -1ULL;
+ return false;
int64_t Imm = Inst.getOperand(0).getImm();
// FIXME: This is not right for thumb.
- return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+ Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+ return true;
}
};
@@ -295,4 +307,10 @@ extern "C" void LLVMInitializeARMTargetMC() {
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
+
+ // Register the MC relocation info.
+ TargetRegistry::RegisterMCRelocationInfo(TheARMTarget,
+ createARMMCRelocationInfo);
+ TargetRegistry::RegisterMCRelocationInfo(TheThumbTarget,
+ createARMMCRelocationInfo);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index a89981e4f0..4e94c5341b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -25,6 +25,7 @@ class MCInstrInfo;
class MCObjectWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
+class MCRelocationInfo;
class StringRef;
class Target;
class raw_ostream;
@@ -58,6 +59,9 @@ MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
uint32_t CPUType,
uint32_t CPUSubtype);
+
+/// createARMMachORelocationInfo - Construct ARM Mach-O relocation info.
+MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
} // End llvm namespace
// Defines symbolic names for ARM registers. This defines a mapping from
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
new file mode 100644
index 0000000000..807c9483bc
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -0,0 +1,43 @@
+//===-- ARMMachORelocationInfo.cpp ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "ARMMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRelocationInfo.h"
+#include "llvm-c/Disassembler.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+class ARMMachORelocationInfo : public MCRelocationInfo {
+public:
+ ARMMachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {}
+
+ const MCExpr *createExprForCAPIVariantKind(const MCExpr *SubExpr,
+ unsigned VariantKind) {
+ switch(VariantKind) {
+ case LLVMDisassembler_VariantKind_ARM_HI16:
+ return ARMMCExpr::CreateUpper16(SubExpr, Ctx);
+ case LLVMDisassembler_VariantKind_ARM_LO16:
+ return ARMMCExpr::CreateLower16(SubExpr, Ctx);
+ default:
+ return MCRelocationInfo::createExprForCAPIVariantKind(SubExpr,
+ VariantKind);
+ }
+ }
+};
+} // End unnamed namespace
+
+/// createARMMachORelocationInfo - Construct an ARM Mach-O RelocationInfo.
+MCRelocationInfo *llvm::createARMMachORelocationInfo(MCContext &Ctx) {
+ return new ARMMachORelocationInfo(Ctx);
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 191db69fbc..c943370818 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -20,6 +20,48 @@
using namespace llvm;
+namespace {
+ /// UnwindOpcodeStreamer - The simple wrapper over SmallVector to emit bytes
+ /// with MSB to LSB per uint32_t ordering. For example, the first byte will
+ /// be placed in Vec[3], and the following bytes will be placed in 2, 1, 0,
+ /// 7, 6, 5, 4, 11, 10, 9, 8, and so on.
+ class UnwindOpcodeStreamer {
+ private:
+ SmallVectorImpl<uint8_t> &Vec;
+ size_t Pos;
+
+ public:
+ UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V), Pos(3) {
+ }
+
+ /// Emit the byte in MSB to LSB per uint32_t order.
+ inline void EmitByte(uint8_t elem) {
+ Vec[Pos] = elem;
+ Pos = (((Pos ^ 0x3u) + 1) ^ 0x3u);
+ }
+
+ /// Emit the size prefix.
+ inline void EmitSize(size_t Size) {
+ size_t SizeInWords = (Size + 3) / 4;
+ assert(SizeInWords <= 0x100u &&
+ "Only 256 additional words are allowed for unwind opcodes");
+ EmitByte(static_cast<uint8_t>(SizeInWords - 1));
+ }
+
+ /// Emit the personality index prefix.
+ inline void EmitPersonalityIndex(unsigned PI) {
+ assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix");
+ EmitByte(EHT_COMPACT | PI);
+ }
+
+ /// Fill the rest of bytes with FINISH opcode.
+ inline void FillFinishOpcode() {
+ while (Pos < Vec.size())
+ EmitByte(UNWIND_OPCODE_FINISH);
+ }
+ };
+}
+
void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
if (RegSave == 0u)
return;
@@ -43,28 +85,22 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
if (UnmaskedReg == 0u) {
// Pop r[4 : (4 + n)]
- Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range);
+ EmitInt8(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range);
RegSave &= 0x000fu;
} else if (UnmaskedReg == (1u << 14)) {
// Pop r[14] + r[4 : (4 + n)]
- Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range);
+ EmitInt8(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range);
RegSave &= 0x000fu;
}
}
// Two bytes opcode to save register r15-r4
- if ((RegSave & 0xfff0u) != 0) {
- uint32_t Op = UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4);
- Ops.push_back(static_cast<uint8_t>(Op >> 8));
- Ops.push_back(static_cast<uint8_t>(Op & 0xff));
- }
+ if ((RegSave & 0xfff0u) != 0)
+ EmitInt16(UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4));
// Opcode to save register r3-r0
- if ((RegSave & 0x000fu) != 0) {
- uint32_t Op = UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu);
- Ops.push_back(static_cast<uint8_t>(Op >> 8));
- Ops.push_back(static_cast<uint8_t>(Op & 0xff));
- }
+ if ((RegSave & 0x000fu) != 0)
+ EmitInt16(UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu));
}
/// Emit unwind opcodes for .vsave directives
@@ -89,10 +125,8 @@ void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
Bit >>= 1;
}
- uint32_t Op =
- UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range;
- Ops.push_back(static_cast<uint8_t>(Op >> 8));
- Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ EmitInt16(UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 |
+ ((i - 16) << 4) | Range);
}
while (i > 0) {
@@ -113,86 +147,75 @@ void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
Bit >>= 1;
}
- uint32_t Op = UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range;
- Ops.push_back(static_cast<uint8_t>(Op >> 8));
- Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ EmitInt16(UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range);
}
}
-/// Emit unwind opcodes for .setfp directives
-void UnwindOpcodeAssembler::EmitSetFP(uint16_t FPReg) {
- Ops.push_back(UNWIND_OPCODE_SET_VSP | FPReg);
+/// Emit unwind opcodes to copy address from source register to $sp.
+void UnwindOpcodeAssembler::EmitSetSP(uint16_t Reg) {
+ EmitInt8(UNWIND_OPCODE_SET_VSP | Reg);
}
-/// Emit unwind opcodes to update stack pointer
+/// Emit unwind opcodes to add $sp with an offset.
void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
if (Offset > 0x200) {
- uint8_t Buff[10];
- size_t Size = encodeULEB128((Offset - 0x204) >> 2, Buff);
- Ops.push_back(UNWIND_OPCODE_INC_VSP_ULEB128);
- Ops.append(Buff, Buff + Size);
+ uint8_t Buff[16];
+ Buff[0] = UNWIND_OPCODE_INC_VSP_ULEB128;
+ size_t ULEBSize = encodeULEB128((Offset - 0x204) >> 2, Buff + 1);
+ EmitBytes(Buff, ULEBSize + 1);
} else if (Offset > 0) {
if (Offset > 0x100) {
- Ops.push_back(UNWIND_OPCODE_INC_VSP | 0x3fu);
+ EmitInt8(UNWIND_OPCODE_INC_VSP | 0x3fu);
Offset -= 0x100;
}
- Ops.push_back(UNWIND_OPCODE_INC_VSP |
- static_cast<uint8_t>((Offset - 4) >> 2));
+ EmitInt8(UNWIND_OPCODE_INC_VSP | static_cast<uint8_t>((Offset - 4) >> 2));
} else if (Offset < 0) {
while (Offset < -0x100) {
- Ops.push_back(UNWIND_OPCODE_DEC_VSP | 0x3fu);
+ EmitInt8(UNWIND_OPCODE_DEC_VSP | 0x3fu);
Offset += 0x100;
}
- Ops.push_back(UNWIND_OPCODE_DEC_VSP |
- static_cast<uint8_t>(((-Offset) - 4) >> 2));
+ EmitInt8(UNWIND_OPCODE_DEC_VSP |
+ static_cast<uint8_t>(((-Offset) - 4) >> 2));
}
}
-void UnwindOpcodeAssembler::AddOpcodeSizePrefix(size_t Pos) {
- size_t SizeInWords = (size() + 3) / 4;
- assert(SizeInWords <= 0x100u &&
- "Only 256 additional words are allowed for unwind opcodes");
- Ops[Pos] = static_cast<uint8_t>(SizeInWords - 1);
-}
+void UnwindOpcodeAssembler::Finalize(unsigned &PersonalityIndex,
+ SmallVectorImpl<uint8_t> &Result) {
-void UnwindOpcodeAssembler::AddPersonalityIndexPrefix(size_t Pos, unsigned PI) {
- assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix");
- Ops[Pos] = EHT_COMPACT | PI;
-}
+ UnwindOpcodeStreamer OpStreamer(Result);
-void UnwindOpcodeAssembler::EmitFinishOpcodes() {
- for (size_t i = (0x4u - (size() & 0x3u)) & 0x3u; i > 0; --i)
- Ops.push_back(UNWIND_OPCODE_FINISH);
-}
-
-void UnwindOpcodeAssembler::Finalize() {
if (HasPersonality) {
- // Personality specified by .personality directive
- Offset = 1;
- AddOpcodeSizePrefix(1);
+ // User-specifed personality routine: [ SIZE , OP1 , OP2 , ... ]
+ PersonalityIndex = NUM_PERSONALITY_INDEX;
+ size_t TotalSize = Ops.size() + 1;
+ size_t RoundUpSize = (TotalSize + 3) / 4 * 4;
+ Result.resize(RoundUpSize);
+ OpStreamer.EmitSize(RoundUpSize);
} else {
- if (getOpcodeSize() <= 3) {
+ if (Ops.size() <= 3) {
// __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ]
- Offset = 1;
PersonalityIndex = AEABI_UNWIND_CPP_PR0;
- AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+ Result.resize(4);
+ OpStreamer.EmitPersonalityIndex(PersonalityIndex);
} else {
// __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ]
- Offset = 0;
PersonalityIndex = AEABI_UNWIND_CPP_PR1;
- AddPersonalityIndexPrefix(Offset, PersonalityIndex);
- AddOpcodeSizePrefix(1);
+ size_t TotalSize = Ops.size() + 2;
+ size_t RoundUpSize = (TotalSize + 3) / 4 * 4;
+ Result.resize(RoundUpSize);
+ OpStreamer.EmitPersonalityIndex(PersonalityIndex);
+ OpStreamer.EmitSize(RoundUpSize);
}
}
- // Emit the padding finish opcodes if the size() is not multiple of 4.
- EmitFinishOpcodes();
+ // Copy the unwind opcodes
+ for (size_t i = OpBegins.size() - 1; i > 0; --i)
+ for (size_t j = OpBegins[i - 1], end = OpBegins[i]; j < end; ++j)
+ OpStreamer.EmitByte(Ops[j]);
- // Swap the byte order
- uint8_t *Ptr = Ops.begin() + Offset;
- assert(size() % 4 == 0 && "Final unwind opcodes should align to 4");
- for (size_t i = 0, n = size(); i < n; i += 4) {
- std::swap(Ptr[i], Ptr[i + 3]);
- std::swap(Ptr[i + 1], Ptr[i + 2]);
- }
+ // Emit the padding finish opcodes if the size is not multiple of 4.
+ OpStreamer.FillFinishOpcode();
+
+ // Reset the assembler state
+ Reset();
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index f6ecaeb8b2..ac67c6efab 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -27,86 +27,61 @@ class MCSymbol;
class UnwindOpcodeAssembler {
private:
- llvm::SmallVector<uint8_t, 8> Ops;
-
- unsigned Offset;
- unsigned PersonalityIndex;
+ llvm::SmallVector<uint8_t, 32> Ops;
+ llvm::SmallVector<unsigned, 8> OpBegins;
bool HasPersonality;
- enum {
- // The number of bytes to be preserved for the size and personality index
- // prefix of unwind opcodes.
- NUM_PRESERVED_PREFIX_BUF = 2
- };
-
public:
UnwindOpcodeAssembler()
- : Ops(NUM_PRESERVED_PREFIX_BUF), Offset(NUM_PRESERVED_PREFIX_BUF),
- PersonalityIndex(NUM_PERSONALITY_INDEX), HasPersonality(0) {
+ : HasPersonality(0) {
+ OpBegins.push_back(0);
}
/// Reset the unwind opcode assembler.
void Reset() {
- Ops.resize(NUM_PRESERVED_PREFIX_BUF);
- Offset = NUM_PRESERVED_PREFIX_BUF;
- PersonalityIndex = NUM_PERSONALITY_INDEX;
+ Ops.clear();
+ OpBegins.clear();
+ OpBegins.push_back(0);
HasPersonality = 0;
}
- /// Get the size of the payload (including the size byte)
- size_t size() const {
- return Ops.size() - Offset;
- }
-
- /// Get the beginning of the payload
- const uint8_t *begin() const {
- return Ops.begin() + Offset;
- }
-
- /// Get the payload
- StringRef data() const {
- return StringRef(reinterpret_cast<const char *>(begin()), size());
- }
-
/// Set the personality index
void setPersonality(const MCSymbol *Per) {
HasPersonality = 1;
}
- /// Get the personality index
- unsigned getPersonalityIndex() const {
- return PersonalityIndex;
- }
-
/// Emit unwind opcodes for .save directives
void EmitRegSave(uint32_t RegSave);
/// Emit unwind opcodes for .vsave directives
void EmitVFPRegSave(uint32_t VFPRegSave);
- /// Emit unwind opcodes for .setfp directives
- void EmitSetFP(uint16_t FPReg);
+ /// Emit unwind opcodes to copy address from source register to $sp.
+ void EmitSetSP(uint16_t Reg);
- /// Emit unwind opcodes to update stack pointer
+ /// Emit unwind opcodes to add $sp with an offset.
void EmitSPOffset(int64_t Offset);
/// Finalize the unwind opcode sequence for EmitBytes()
- void Finalize();
+ void Finalize(unsigned &PersonalityIndex,
+ SmallVectorImpl<uint8_t> &Result);
private:
- /// Get the size of the opcodes in bytes.
- size_t getOpcodeSize() const {
- return Ops.size() - NUM_PRESERVED_PREFIX_BUF;
+ void EmitInt8(unsigned Opcode) {
+ Ops.push_back(Opcode & 0xff);
+ OpBegins.push_back(OpBegins.back() + 1);
}
- /// Add the length prefix to the payload
- void AddOpcodeSizePrefix(size_t Pos);
-
- /// Add personality index prefix in some compact format
- void AddPersonalityIndexPrefix(size_t Pos, unsigned PersonalityIndex);
+ void EmitInt16(unsigned Opcode) {
+ Ops.push_back((Opcode >> 8) & 0xff);
+ Ops.push_back(Opcode & 0xff);
+ OpBegins.push_back(OpBegins.back() + 2);
+ }
- /// Fill the words with finish opcode if it is not aligned
- void EmitFinishOpcodes();
+ void EmitBytes(const uint8_t *Opcode, size_t Size) {
+ Ops.insert(Ops.end(), Opcode, Opcode + Size);
+ OpBegins.push_back(OpBegins.back() + Size);
+ }
};
} // namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index a7ac5ca061..bab59f41c9 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMARMDesc
ARMMachObjectWriter.cpp
ARMELFObjectWriter.cpp
ARMUnwindOpAsm.cpp
+ ARMMachORelocationInfo.cpp
)
add_dependencies(LLVMARMDesc ARMCommonTableGen)
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 1e2a8b03e1..db49db8ca3 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -88,7 +88,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -249,7 +250,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
int NumBytes = (int)MFI->getStackSize();
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 095736d52a..22a925e0ff 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -22,7 +22,7 @@
using namespace llvm;
Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(*this, STI) {
+ : ARMBaseInstrInfo(STI), RI(STI) {
}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 7452fb776e..6722614027 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -40,9 +40,8 @@ extern cl::opt<bool> ReuseFrameIndexVals;
using namespace llvm;
-Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
- const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(tii, sti) {
+Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(sti) {
}
const TargetRegisterClass*
@@ -70,6 +69,7 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
@@ -488,6 +488,9 @@ void
Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
unsigned BaseReg, int64_t Offset) const {
MachineInstr &MI = *I;
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(
+ MI.getParent()->getParent()->getTarget().getInstrInfo());
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
@@ -513,6 +516,7 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
// off the frame pointer (if, for example, there are alloca() calls in
// the function, the offset will be negative. Use R12 instead since that's
// a call clobbered register that we know won't be used in Thumb1 mode.
+ const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo();
DebugLoc DL;
AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
.addReg(ARM::R12, RegState::Define)
@@ -558,6 +562,8 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
MachineInstrBuilder MIB(*MBB.getParent(), &MI);
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index ebbab36dd7..9689b23146 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -25,7 +25,7 @@ namespace llvm {
struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
public:
- Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+ Thumb1RegisterInfo(const ARMSubtarget &STI);
const TargetRegisterClass*
getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 97c254ce75..d8596d7993 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -73,15 +73,15 @@ static void TrackDefUses(MachineInstr *MI,
for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
unsigned Reg = LocalUses[i];
- Uses.insert(Reg);
- for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg)
+ for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
+ Subreg.isValid(); ++Subreg)
Uses.insert(*Subreg);
}
for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
unsigned Reg = LocalDefs[i];
- Defs.insert(Reg);
- for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg)
+ for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
+ Subreg.isValid(); ++Subreg)
Defs.insert(*Subreg);
if (Reg == ARM::CPSR)
continue;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index a1b48c226a..286eaa0946 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -31,7 +31,7 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
cl::init(false));
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(*this, STI) {
+ : ARMBaseInstrInfo(STI), RI(STI) {
}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
@@ -285,7 +285,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
NumBytes = 0;
} else {
// FIXME: Move this to ARMAddressingModes.h?
- unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ unsigned RotAmt = countLeadingZeros(ThisVal);
ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
NumBytes &= ~ThisVal;
assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
@@ -302,7 +302,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
NumBytes = 0;
} else {
// FIXME: Move this to ARMAddressingModes.h?
- unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ unsigned RotAmt = countLeadingZeros(ThisVal);
ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
NumBytes &= ~ThisVal;
assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
@@ -484,7 +484,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Otherwise, extract 8 adjacent bits from the immediate into this
// t2ADDri/t2SUBri.
- unsigned RotAmt = CountLeadingZeros_32(Offset);
+ unsigned RotAmt = countLeadingZeros<unsigned>(Offset);
unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt);
// We will handle these bits from offset, clear them.
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 1a7a4d450c..4cb827f308 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -24,9 +24,8 @@
#include "llvm/IR/Function.h"
using namespace llvm;
-Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
- const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(tii, sti) {
+Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(sti) {
}
/// emitLoadConstPool - Emits a load from constpool to materialize the
@@ -40,6 +39,7 @@ Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index 6b397e8696..b1d63fa86d 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -20,12 +20,12 @@
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
- class ARMSubtarget;
- class ARMBaseInstrInfo;
+
+class ARMSubtarget;
struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
public:
- Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+ Thumb2RegisterInfo(const ARMSubtarget &STI);
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.