diff options
Diffstat (limited to 'lib/Target/ARM')
56 files changed, 3080 insertions, 1044 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 2d7470919d..1bc9d6b410 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -38,7 +38,8 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", - "Does not support ARM mode execution">; + "Does not support ARM mode execution", + [ModeThumb]>; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", @@ -59,6 +60,8 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", "Floating point unit supports single precision only">; +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable support for Performance Monitor extensions">; def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", "Enable support for TrustZone security extensions">; @@ -134,7 +137,7 @@ def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", [HasV6Ops, FeatureThumb2]>; def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", - [HasV6T2Ops]>; + [HasV6T2Ops, FeaturePerfMon]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -175,7 +178,8 @@ def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", FeatureTrustZone]>; def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", "Cortex-R5 ARM processors", - [FeatureSlowFPBrcc, FeatureHWDivARM, + [FeatureSlowFPBrcc, + FeatureHWDiv, FeatureHWDivARM, FeatureHasSlowFPVMLx, FeatureAvoidPartialCPSR, FeatureT2XtPk]>; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 0d1417dd17..ad1447503f 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -113,8 +113,7 @@ ScheduleHazardRecognizer *ARMBaseInstrInfo:: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const { if (Subtarget.isThumb2() || Subtarget.hasVFP2()) - return (ScheduleHazardRecognizer *) - new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); + return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG); return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } @@ -283,14 +282,20 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, return false; --I; } - if (!isUnpredicatedTerminator(I)) - return false; // Get the last instruction in the block. MachineInstr *LastInst = I; + unsigned LastOpc = LastInst->getOpcode(); + + // Check if it's an indirect branch first, this should return 'unanalyzable' + // even if it's predicated. + if (isIndirectBranchOpcode(LastOpc)) + return true; + + if (!isUnpredicatedTerminator(I)) + return false; // If there is only one terminator instruction, process it. - unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (isUncondBranchOpcode(LastOpc)) { TBB = LastInst->getOperand(0).getMBB(); @@ -4146,6 +4151,8 @@ bool ARMBaseInstrInfo::hasNOP() const { } bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { + if (MI->getNumOperands() < 4) + return true; unsigned ShOpVal = MI->getOperand(3).getImm(); unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 2ef659c23b..4ca3d7b411 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -46,7 +46,7 @@ public: MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; - virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0; + virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0; const ARMSubtarget &getSubtarget() const { return Subtarget; } ScheduleHazardRecognizer * diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b0d34a76b0..7c03055808 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -43,9 +43,8 @@ using namespace llvm; -ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti), +ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), BasePtr(ARM::R6) { } @@ -94,6 +93,7 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(ARM::SP); Reserved.set(ARM::PC); Reserved.set(ARM::FPSCR); + Reserved.set(ARM::APSR_NZCV); if (TFI->hasFP(MF)) Reserved.set(FramePtr); if (hasBasePointer(MF)) @@ -375,6 +375,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); @@ -556,9 +557,10 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, if (Ins != MBB->end()) DL = Ins->getDebugLoc(); - const MCInstrDesc &MCID = TII.get(ADDriOpc); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); const MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const MCInstrDesc &MCID = TII.get(ADDriOpc); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg) @@ -574,6 +576,8 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, MachineInstr &MI = *I; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -671,6 +675,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 0679919152..03b3682541 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -74,7 +74,6 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) { class ARMBaseRegisterInfo : public ARMGenRegisterInfo { protected: - const ARMBaseInstrInfo &TII; const ARMSubtarget &STI; /// FramePtr - ARM physical register used as frame ptr. @@ -86,8 +85,7 @@ protected: unsigned BasePtr; // Can be only subclassed. - explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &STI); + explicit ARMBaseRegisterInfo(const ARMSubtarget &STI); // Return the opcode that implements 'Op', or 0 if no opcode unsigned getOpcode(int Op) const; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 95decfe7d3..4a157d7430 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -1137,8 +1137,8 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, return; } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) { uint32_t v = ~MI.getOperand(2).getImm(); - int32_t lsb = CountTrailingZeros_32(v); - int32_t msb = (32 - CountLeadingZeros_32(v)) - 1; + int32_t lsb = countTrailingZeros(v); + int32_t msb = (32 - countLeadingZeros(v)) - 1; // Instr{20-16} = msb, Instr{11-7} = lsb Binary |= (msb & 0x1F) << 16; Binary |= (lsb & 0x1F) << 7; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 4891609b33..cff5ce27bc 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -128,7 +128,7 @@ namespace { // If the block size isn't a multiple of the known bits, assume the // worst case padding. if (Size & ((1u << Bits) - 1)) - Bits = CountTrailingZeros_32(Size); + Bits = countTrailingZeros(Size); return Bits; } @@ -753,6 +753,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { Scale = 4; break; + case ARM::LDRBi12: case ARM::LDRi12: case ARM::LDRcp: case ARM::t2LDRpci: diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 5d45f64912..a4de941689 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -20,6 +20,7 @@ #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -41,6 +42,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -1025,7 +1027,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, useAM3 = true; } } - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::i16: if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) @@ -1040,7 +1042,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, Opc = isZExt ? ARM::LDRH : ARM::LDRSH; useAM3 = true; } - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::i32: if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) @@ -1054,7 +1056,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, } else { Opc = ARM::LDRi12; } - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; @@ -1063,7 +1065,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, needVMOV = true; VT = MVT::i32; Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; } else { Opc = ARM::VLDRS; RC = TLI.getRegClassFor(VT); @@ -1802,7 +1804,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { unsigned SrcReg2 = getRegForValue(I->getOperand(1)); if (SrcReg2 == 0) return false; - unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2)); @@ -1985,7 +1987,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); - assert (Arg != 0 && "Failed to emit a sext"); + assert (Arg != 0 && "Failed to emit a zext"); ArgVT = DestVT; break; } @@ -2602,47 +2604,112 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt) { if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) return 0; + if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1) + return 0; - unsigned Opc; - bool isBoolZext = false; - const TargetRegisterClass *RC; - switch (SrcVT.SimpleTy) { - default: return 0; - case MVT::i16: - if (!Subtarget->hasV6Ops()) return 0; - RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; - if (isZExt) - Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH; - else - Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; - break; - case MVT::i8: - if (!Subtarget->hasV6Ops()) return 0; - RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; - if (isZExt) - Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB; - else - Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; - break; - case MVT::i1: - if (isZExt) { - RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; - Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; - isBoolZext = true; - break; + // Table of which combinations can be emitted as a single instruction, + // and which will require two. + static const uint8_t isSingleInstrTbl[3][2][2][2] = { + // ARM Thumb + // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops + // ext: s z s z s z s z + /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } }, + /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }, + /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } } + }; + + // Target registers for: + // - For ARM can never be PC. + // - For 16-bit Thumb are restricted to lower 8 registers. + // - For 32-bit Thumb are restricted to non-SP and non-PC. + static const TargetRegisterClass *RCTbl[2][2] = { + // Instructions: Two Single + /* ARM */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass }, + /* Thumb */ { &ARM::tGPRRegClass, &ARM::rGPRRegClass } + }; + + // Table governing the instruction(s) to be emitted. + static const struct { + // First entry for each of the following is sext, second zext. + uint16_t Opc[2]; + uint8_t Imm[2]; // All instructions have either a shift or a mask. + uint8_t hasS[2]; // Some instructions have an S bit, always set it to 0. + } OpcTbl[2][2][3] = { + { // Two instructions (first is left shift, second is in this table). + { // ARM + /* 1 */ { { ARM::ASRi, ARM::LSRi }, { 31, 31 }, { 1, 1 } }, + /* 8 */ { { ARM::ASRi, ARM::LSRi }, { 24, 24 }, { 1, 1 } }, + /* 16 */ { { ARM::ASRi, ARM::LSRi }, { 16, 16 }, { 1, 1 } } + }, + { // Thumb + /* 1 */ { { ARM::tASRri, ARM::tLSRri }, { 31, 31 }, { 0, 0 } }, + /* 8 */ { { ARM::tASRri, ARM::tLSRri }, { 24, 24 }, { 0, 0 } }, + /* 16 */ { { ARM::tASRri, ARM::tLSRri }, { 16, 16 }, { 0, 0 } } + } + }, + { // Single instruction. + { // ARM + /* 1 */ { { ARM::KILL, ARM::ANDri }, { 0, 1 }, { 0, 1 } }, + /* 8 */ { { ARM::SXTB, ARM::ANDri }, { 0, 255 }, { 0, 1 } }, + /* 16 */ { { ARM::SXTH, ARM::UXTH }, { 0, 0 }, { 0, 0 } } + }, + { // Thumb + /* 1 */ { { ARM::KILL, ARM::t2ANDri }, { 0, 1 }, { 0, 1 } }, + /* 8 */ { { ARM::t2SXTB, ARM::t2ANDri }, { 0, 255 }, { 0, 1 } }, + /* 16 */ { { ARM::t2SXTH, ARM::t2UXTH }, { 0, 0 }, { 0, 0 } } + } } - return 0; + }; + + unsigned SrcBits = SrcVT.getSizeInBits(); + unsigned DestBits = DestVT.getSizeInBits(); + (void) DestBits; + assert((SrcBits < DestBits) && "can only extend to larger types"); + assert((DestBits == 32 || DestBits == 16 || DestBits == 8) && + "other sizes unimplemented"); + assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) && + "other sizes unimplemented"); + + bool hasV6Ops = Subtarget->hasV6Ops(); + unsigned Bitness = countTrailingZeros(SrcBits) >> 1; // {1,8,16}=>{0,1,2} + assert((Bitness < 3) && "sanity-check table bounds"); + + bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt]; + const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr]; + unsigned Opc = OpcTbl[isSingleInstr][isThumb2][Bitness].Opc[isZExt]; + assert(ARM::KILL != Opc && "Invalid table entry"); + unsigned Imm = OpcTbl[isSingleInstr][isThumb2][Bitness].Imm[isZExt]; + unsigned hasS = OpcTbl[isSingleInstr][isThumb2][Bitness].hasS[isZExt]; + + // 16-bit Thumb instructions always set CPSR (unless they're in an IT block). + bool setsCPSR = &ARM::tGPRRegClass == RC; + unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::LSLi; + unsigned ResultReg; + + // Either one or two instructions are emitted. + // They're always of the form: + // dst = in OP imm + // CPSR is set only by 16-bit Thumb instructions. + // Predicate, if any, is AL. + // S bit, if available, is always 0. + // When two are emitted the first's result will feed as the second's input, + // that value is then dead. + unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2; + for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) { + ResultReg = createResultReg(RC); + unsigned Opcode = ((0 == Instr) && !isSingleInstr) ? LSLOpc : Opc; + bool isKill = 1 == Instr; + MachineInstrBuilder MIB = BuildMI( + *FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg); + if (setsCPSR) + MIB.addReg(ARM::CPSR, RegState::Define); + AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(Imm)); + if (hasS) + AddDefaultCC(MIB); + // Second instruction consumes the first's result. + SrcReg = ResultReg; } - unsigned ResultReg = createResultReg(RC); - MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) - .addReg(SrcReg); - if (isBoolZext) - MIB.addImm(1); - else - MIB.addImm(0); - AddOptionalDefs(MIB); return ResultReg; } @@ -2707,7 +2774,7 @@ bool ARMFastISel::SelectShift(const Instruction *I, if (Reg2 == 0) return false; } - unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass); if(ResultReg == 0) return false; MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -2797,6 +2864,25 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { return false; } +namespace { +// This table describes sign- and zero-extend instructions which can be +// folded into a preceding load. All of these extends have an immediate +// (sometimes a mask and sometimes a shift) that's applied after +// extension. +const struct FoldableLoadExtendsStruct { + uint16_t Opc[2]; // ARM, Thumb. + uint8_t ExpectedImm; + uint8_t isZExt : 1; + uint8_t ExpectedVT : 7; +} FoldableLoadExtends[] = { + { { ARM::SXTH, ARM::t2SXTH }, 0, 0, MVT::i16 }, + { { ARM::UXTH, ARM::t2UXTH }, 0, 1, MVT::i16 }, + { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8 }, + { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 }, + { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 } +}; +} + /// \brief The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if @@ -2812,26 +2898,23 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, // ldrb r1, [r0] ldrb r1, [r0] // uxtb r2, r1 => // mov r3, r2 mov r3, r1 - bool isZExt = true; - switch(MI->getOpcode()) { - default: return false; - case ARM::SXTH: - case ARM::t2SXTH: - isZExt = false; - case ARM::UXTH: - case ARM::t2UXTH: - if (VT != MVT::i16) - return false; - break; - case ARM::SXTB: - case ARM::t2SXTB: - isZExt = false; - case ARM::UXTB: - case ARM::t2UXTB: - if (VT != MVT::i8) - return false; - break; + if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm()) + return false; + const uint64_t Imm = MI->getOperand(2).getImm(); + + bool Found = false; + bool isZExt; + for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends); + i != e; ++i) { + if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() && + (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm && + MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) { + Found = true; + isZExt = FoldableLoadExtends[i].isZExt; + } } + if (!Found) return false; + // See if we can handle this address. Address Addr; if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 483802b130..c8637be2bd 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -141,7 +141,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -357,7 +358,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 1240169e84..c69d313fd9 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -44,10 +44,16 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) { MachineInstr *DefMI = LastMI; const MCInstrDesc &LastMCID = LastMI->getDesc(); + const TargetMachine &TM = + MI->getParent()->getParent()->getTarget(); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + // Skip over one non-VFP / NEON instruction. if (!LastMI->isBarrier() && // On A9, AGU and NEON/FPU are muxed. - !(STI.isLikeA9() && (LastMI->mayLoad() || LastMI->mayStore())) && + !(TII.getSubtarget().isLikeA9() && + (LastMI->mayLoad() || LastMI->mayStore())) && (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { @@ -58,7 +64,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (TII.isFpMLxInstruction(DefMI->getOpcode()) && (TII.canCauseFpMLxStall(MI->getOpcode()) || - hasRAWHazard(DefMI, MI, TRI))) { + hasRAWHazard(DefMI, MI, TII.getRegisterInfo()))) { // Try to schedule another instruction for the next 4 cycles. if (FpMLxStalls == 0) FpMLxStalls = 4; diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h index 98bfc4cf0c..e1dcec3d1c 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.h +++ b/lib/Target/ARM/ARMHazardRecognizer.h @@ -28,21 +28,14 @@ class MachineInstr; /// ARM preRA scheduler uses an unspecialized instance of the /// ScoreboardHazardRecognizer. class ARMHazardRecognizer : public ScoreboardHazardRecognizer { - const ARMBaseInstrInfo &TII; - const ARMBaseRegisterInfo &TRI; - const ARMSubtarget &STI; - MachineInstr *LastMI; unsigned FpMLxStalls; public: ARMHazardRecognizer(const InstrItineraryData *ItinData, - const ARMBaseInstrInfo &tii, - const ARMBaseRegisterInfo &tri, - const ARMSubtarget &sti, - const ScheduleDAG *DAG) : - ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii), - TRI(tri), STI(sti), LastMI(0) {} + const ScheduleDAG *DAG) + : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), + LastMI(0) {} virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void Reset(); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 9e1782e119..962368d07f 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -364,7 +364,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() { continue; // Check if the AND mask is an immediate of the form: 000.....1111111100 - unsigned TZ = CountTrailingZeros_32(And_imm); + unsigned TZ = countTrailingZeros(And_imm); if (TZ != 1 && TZ != 2) // Be conservative here. Shifter operands aren't always free. e.g. On // Swift, left shifter operand of 1 / 2 for free but others are not. @@ -402,12 +402,12 @@ void ARMDAGToDAGISel::PreprocessISelDAG() { } // Now make the transformation. - Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32, + Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, Srl.getOperand(0), CurDAG->getConstant(Srl_imm+TZ, MVT::i32)); - N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32, + N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, Srl, CurDAG->getConstant(And_imm, MVT::i32)); - N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32, + N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, N1, CurDAG->getConstant(TZ, MVT::i32)); CurDAG->UpdateNodeOperands(N, N0, N1); } @@ -533,7 +533,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -557,7 +557,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -703,7 +703,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -724,7 +724,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -901,7 +901,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); @@ -915,7 +915,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -960,7 +960,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -978,7 +978,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } ARM_AM::AddrOpc AddSub = ARM_AM::add; @@ -1202,7 +1202,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1219,7 +1219,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1267,7 +1267,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1297,7 +1297,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1326,7 +1326,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1468,14 +1468,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, + return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, MVT::Other, Ops); } else { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, + return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, MVT::Other, Ops); } } @@ -1524,7 +1524,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32, + return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, MVT::Other, Ops); } @@ -1533,7 +1533,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { /// \brief Form a GPRPair pseudo register from a pair of GPR regs. SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); @@ -1544,7 +1544,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { /// \brief Form a D register from a pair of S registers. SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); @@ -1555,7 +1555,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { /// \brief Form a quad register from a pair of D registers. SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); @@ -1565,7 +1565,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { /// \brief Form 4 consecutive D registers from a pair of Q registers. SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); @@ -1576,7 +1576,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { /// \brief Form 4 consecutive S registers. SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); @@ -1591,7 +1591,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, /// \brief Form 4 consecutive D registers. SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); @@ -1605,7 +1605,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, /// \brief Form 4 consecutive Q registers. SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); @@ -1689,7 +1689,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *QOpcodes0, const uint16_t *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; @@ -1821,7 +1821,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *QOpcodes0, const uint16_t *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; @@ -1966,7 +1966,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, const uint16_t *DOpcodes, const uint16_t *QOpcodes) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; @@ -2084,7 +2084,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *Opcodes) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue MemAddr, Align; if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) @@ -2166,7 +2166,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc) { assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); unsigned FirstTblReg = IsExt ? 2 : 1; @@ -2536,7 +2536,7 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { Ops.push_back(Node->getOperand(0)); // Chain MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); - SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), + SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), MVT::i32, MVT::i32, MVT::Other, Ops); cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); @@ -2544,7 +2544,7 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { } SDNode *ARMDAGToDAGISel::Select(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->isMachineOpcode()) return NULL; // Already selected. @@ -2587,7 +2587,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue CPIdx = CurDAG->getTargetConstantPool(ConstantInt::get( Type::getInt32Ty(*CurDAG->getContext()), Val), - TLI.getPointerTy()); + TLI->getPointerTy()); SDNode *ResNode; if (Subtarget->isThumb1Only()) { @@ -2617,7 +2617,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::FrameIndex: { // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); if (Subtarget->isThumb1Only()) { SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; @@ -3121,7 +3121,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_ldrexd: { SDValue MemAddr = N->getOperand(2); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Chain = N->getOperand(0); bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); @@ -3179,7 +3179,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case Intrinsic::arm_strexd: { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Chain = N->getOperand(0); SDValue Val0 = N->getOperand(2); SDValue Val1 = N->getOperand(3); @@ -3383,7 +3383,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VTBL1: { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SmallVector<SDValue, 6> Ops; @@ -3394,7 +3394,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops); } case ARMISD::VTBL2: { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); // Form a REG_SEQUENCE to force register allocation. @@ -3462,7 +3462,7 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ if (AsmString.find(":H}") == StringRef::npos) return NULL; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Glue = N->getOperand(NumOps-1); // Glue node will be appended late. @@ -3567,7 +3567,7 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ if (!Changed) return NULL; - SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], AsmNodeOperands.size()); New->setNodeId(-1); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 9475f1b5a0..ec0e9c2b54 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -681,6 +681,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); + // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); @@ -1069,7 +1071,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { +EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) return getPointerTy(); return VT.changeVectorElementTypeToInteger(); } @@ -1233,7 +1235,7 @@ SDValue ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, SDValue ThisVal) const { @@ -1314,7 +1316,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); @@ -1325,7 +1327,7 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, false, false, 0); } -void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, +void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, @@ -1357,7 +1359,7 @@ SDValue ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; + SDLoc &dl = CLI.DL; SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; SmallVector<SDValue, 32> &OutVals = CLI.OutVals; SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; @@ -1406,7 +1408,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!isSibCall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + dl); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); @@ -1481,10 +1484,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // True if this byval aggregate will be split between registers // and memory. - if (CCInfo.isFirstByValRegValid()) { + unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); + unsigned CurByValIdx = CCInfo.getInRegsParamsProceed(); + + if (CurByValIdx < ByValArgsCount) { + + unsigned RegBegin, RegEnd; + CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); unsigned int i, j; - for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { + for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { SDValue Const = DAG.getConstant(4*i, MVT::i32); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, @@ -1493,11 +1503,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } - offset = ARM::R4 - CCInfo.getFirstByValReg(); - CCInfo.clearFirstByValReg(); + + // If parameter size outsides register area, "offset" value + // helps us to calculate stack slot for remained part properly. + offset = RegEnd - RegBegin; + + CCInfo.nextInRegsParam(); } - if (Flags.getByValSize() - 4*offset > 0) { + if (Flags.getByValSize() > 4*offset) { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, @@ -1718,7 +1732,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag); + DAG.getIntPtrConstant(0, true), InFlag, dl); if (!Ins.empty()) InFlag = Chain.getValue(1); @@ -1740,9 +1754,24 @@ ARMTargetLowering::HandleByVal( assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); - if ((!State->isFirstByValRegValid()) && - (!Subtarget->isAAPCS_ABI() || State->getNextStackOffset() == 0) && - (ARM::R0 <= reg) && (reg <= ARM::R3)) { + + // For in-prologue parameters handling, we also introduce stack offset + // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal. + // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how + // NSAA should be evaluted (NSAA means "next stacked argument address"). + // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs. + // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs. + unsigned NSAAOffset = State->getNextStackOffset(); + if (State->getCallOrPrologue() != Call) { + for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) { + unsigned RB, RE; + State->getInRegsParamInfo(i, RB, RE); + assert(NSAAOffset >= (RE-RB)*4 && + "Stack offset for byval regs doesn't introduced anymore?"); + NSAAOffset -= (RE-RB)*4; + } + } + if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { if (Subtarget->isAAPCS_ABI() && Align > 4) { unsigned AlignInRegs = Align / 4; unsigned Waste = (ARM::R4 - reg) % AlignInRegs; @@ -1750,22 +1779,45 @@ ARMTargetLowering::HandleByVal( reg = State->AllocateReg(GPRArgRegs, 4); } if (reg != 0) { - State->setFirstByValReg(reg); + unsigned excess = 4 * (ARM::R4 - reg); + + // Special case when NSAA != SP and parameter size greater than size of + // all remained GPR regs. In that case we can't split parameter, we must + // send it to stack. We also must set NCRN to R4, so waste all + // remained registers. + if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { + while (State->AllocateReg(GPRArgRegs, 4)) + ; + return; + } + + // First register for byval parameter is the first register that wasn't + // allocated before this method call, so it would be "reg". + // If parameter is small enough to be saved in range [reg, r4), then + // the end (first after last) register would be reg + param-size-in-regs, + // else parameter would be splitted between registers and stack, + // end register would be r4 in this case. + unsigned ByValRegBegin = reg; + unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4; + State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); + // Note, first register is allocated in the beginning of function already, + // allocate remained amount of registers we need. + for (unsigned i = reg+1; i != ByValRegEnd; ++i) + State->AllocateReg(GPRArgRegs, 4); // At a call site, a byval parameter that is split between // registers and memory needs its size truncated here. In a // function prologue, such byval parameters are reassembled in // memory, and are not truncated. if (State->getCallOrPrologue() == Call) { - unsigned excess = 4 * (ARM::R4 - reg); - assert(size >= excess && "expected larger existing stack allocation"); - size -= excess; + // Make remained size equal to 0 in case, when + // the whole structure may be stored into registers. + if (size < excess) + size = 0; + else + size -= excess; } } } - // Confiscate any remaining parameter registers to preclude their - // assignment to subsequent parameters. - while (State->AllocateReg(GPRArgRegs, 4)) - ; } /// MatchingStackOffset - Return true if the given stack call argument is @@ -1970,7 +2022,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { + SDLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. SmallVector<CCValAssign, 16> RVLocs; @@ -2098,7 +2150,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { Copy = *Copy->use_begin(); if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; - Chain = Copy->getOperand(0); + TCChain = Copy->getOperand(0); } else { return false; } @@ -2137,7 +2189,7 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { EVT PtrVT = Op.getValueType(); // FIXME there is no actual debug info here - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); SDValue Res; if (CP->isMachineConstantPoolEntry()) @@ -2158,7 +2210,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = 0; - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); EVT PtrVT = getPointerTy(); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); @@ -2187,7 +2239,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const { - DebugLoc dl = GA->getDebugLoc(); + SDLoc dl(GA); EVT PtrVT = getPointerTy(); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; MachineFunction &MF = DAG.getMachineFunction(); @@ -2230,7 +2282,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SelectionDAG &DAG, TLSModel::Model model) const { const GlobalValue *GV = GA->getGlobal(); - DebugLoc dl = GA->getDebugLoc(); + SDLoc dl(GA); SDValue Offset; SDValue Chain = DAG.getEntryNode(); EVT PtrVT = getPointerTy(); @@ -2300,7 +2352,7 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); @@ -2343,7 +2395,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); @@ -2408,7 +2460,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", @@ -2424,7 +2476,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue Val = DAG.getConstant(0, MVT::i32); return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), @@ -2433,7 +2485,7 @@ ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDValue ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), Op.getOperand(1), DAG.getConstant(0, MVT::i32)); } @@ -2442,7 +2494,7 @@ SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::arm_thread_pointer: { @@ -2478,7 +2530,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, case Intrinsic::arm_neon_vmullu: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) ? ARMISD::VMULLs : ARMISD::VMULLu; - return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } } @@ -2487,7 +2539,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // FIXME: handle "fence singlethread" more efficiently. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get @@ -2510,7 +2562,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, // Just preserve the chain. return Op.getOperand(0); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; if (!isRead && (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) @@ -2535,7 +2587,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); @@ -2546,7 +2598,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -2580,13 +2632,17 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, void ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, + unsigned InRegsParamRecordIdx, + unsigned ArgSize, unsigned &ArgRegsSize, unsigned &ArgRegsSaveSize) const { unsigned NumGPRs; - if (CCInfo.isFirstByValRegValid()) - NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); - else { + if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { + unsigned RBegin, REnd; + CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); + NumGPRs = REnd - RBegin; + } else { unsigned int firstUnalloced; firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, sizeof(GPRArgRegs) / @@ -2596,7 +2652,29 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); ArgRegsSize = NumGPRs * 4; - ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1); + + // If parameter is split between stack and GPRs... + if (NumGPRs && Align == 8 && + (ArgRegsSize < ArgSize || + InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { + // Add padding for part of param recovered from GPRs, so + // its last byte must be at address K*8 - 1. + // We need to do it, since remained (stack) part of parameter has + // stack alignment, and we need to "attach" "GPRs head" without gaps + // to it: + // Stack: + // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... + // [ [padding] [GPRs head] ] [ Tail passed via stack .... + // + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned Padding = + ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) - + (ArgRegsSize + AFI->getArgRegsSaveSize()); + ArgRegsSaveSize = ArgRegsSize + Padding; + } else + // We don't need to extend regs save size for byval parameters if they + // are passed via GPRs only. + ArgRegsSaveSize = ArgRegsSize; } // The remaining GPRs hold either the beginning of variable-argument @@ -2609,10 +2687,12 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, // Return: The frame index registers were stored into. int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, + SDLoc dl, SDValue &Chain, const Value *OrigArg, + unsigned InRegsParamRecordIdx, unsigned OffsetFromOrigArg, unsigned ArgOffset, + unsigned ArgSize, bool ForceMutable) const { // Currently, two use-cases possible: @@ -2629,33 +2709,45 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned firstRegToSaveIndex; - if (CCInfo.isFirstByValRegValid()) - firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; - else { + unsigned firstRegToSaveIndex, lastRegToSaveIndex; + unsigned RBegin, REnd; + if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { + CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); + firstRegToSaveIndex = RBegin - ARM::R0; + lastRegToSaveIndex = REnd - ARM::R0; + } else { firstRegToSaveIndex = CCInfo.getFirstUnallocated (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + lastRegToSaveIndex = 4; } unsigned ArgRegsSize, ArgRegsSaveSize; - computeRegArea(CCInfo, MF, ArgRegsSize, ArgRegsSaveSize); + computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, + ArgRegsSize, ArgRegsSaveSize); // Store any by-val regs to their spots on the stack so that they may be // loaded by deferencing the result of formal parameter pointer or va_next. // Note: once stack area for byval/varargs registers // was initialized, it can't be initialized again. - if (!AFI->getArgRegsSaveSize() && ArgRegsSaveSize) { + if (ArgRegsSaveSize) { + + unsigned Padding = ArgRegsSaveSize - ArgRegsSize; - AFI->setArgRegsSaveSize(ArgRegsSaveSize); + if (Padding) { + assert(AFI->getStoredByValParamsPadding() == 0 && + "The only parameter may be padded."); + AFI->setStoredByValParamsPadding(Padding); + } int FrameIndex = MFI->CreateFixedObject( ArgRegsSaveSize, - ArgOffset + ArgRegsSaveSize - ArgRegsSize, + Padding + ArgOffset, false); SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); SmallVector<SDValue, 4> MemOps; - for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) { + for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; + ++firstRegToSaveIndex, ++i) { const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = &ARM::tGPRRegClass; @@ -2672,19 +2764,23 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); } + + AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); + if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], MemOps.size()); return FrameIndex; } else // This will point to the next argument passed via stack. - return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable); + return MFI->CreateFixedObject( + 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); } // Setup stack frame, the va_list pointer will start from. void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, + SDLoc dl, SDValue &Chain, unsigned ArgOffset, bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -2696,7 +2792,8 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // If there is no regs to be stored, just point address after last // argument passed via stack. int FrameIndex = - StoreByValRegs(CCInfo, DAG, dl, Chain, 0, 0, ArgOffset, ForceMutable); + StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), + 0, ArgOffset, 0, ForceMutable); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -2706,7 +2803,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -2727,6 +2824,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue ArgValue; Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; + + // Initially ArgRegsSaveSize is zero. + // Then we increase this value each time we meet byval parameter. + // We also increase this value in case of varargs function. + AFI->setArgRegsSaveSize(0); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); @@ -2824,15 +2927,21 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Since they could be overwritten by lowering of arguments in case of // a tail call. if (Flags.isByVal()) { + unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); int FrameIndex = StoreByValRegs( - CCInfo, DAG, dl, Chain, CurOrigArg, - Ins[VA.getValNo()].PartOffset, - VA.getLocMemOffset(), - true /*force mutable frames*/); + CCInfo, DAG, dl, Chain, CurOrigArg, + CurByValIndex, + Ins[VA.getValNo()].PartOffset, + VA.getLocMemOffset(), + Flags.getByValSize(), + true /*force mutable frames*/); InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); + CCInfo.nextInRegsParam(); } else { + unsigned FIOffset = VA.getLocMemOffset() + + AFI->getStoredByValParamsPadding(); int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, - VA.getLocMemOffset(), true); + FIOffset, true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -2874,7 +2983,7 @@ static bool isFloatingPointZero(SDValue Op) { SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); if (!isLegalICmpImmediate(C)) { @@ -2932,7 +3041,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); @@ -2946,7 +3055,7 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, SDValue ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { unsigned Opc = Cmp.getOpcode(); - DebugLoc DL = Cmp.getDebugLoc(); + SDLoc DL(Cmp); if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); @@ -2966,7 +3075,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Convert: // @@ -3021,7 +3130,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; @@ -3076,7 +3185,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant(0, MVT::i32); if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) - return DAG.getLoad(MVT::i32, Op.getDebugLoc(), + return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), Ld->getAlignment()); @@ -3094,7 +3203,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { SDValue Ptr = Ld->getBasePtr(); - RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), + RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), @@ -3102,9 +3211,9 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, EVT PtrType = Ptr.getValueType(); unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); - SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op), PtrType, Ptr, DAG.getConstant(4, PtrType)); - RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), + RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), NewPtr, Ld->getPointerInfo().getWithOffset(4), Ld->isVolatile(), Ld->isNonTemporal(), @@ -3124,7 +3233,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); bool LHSSeenZero = false; bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); @@ -3174,7 +3283,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; @@ -3215,7 +3324,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); SDValue Index = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT PTy = getPointerTy(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); @@ -3251,7 +3360,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Op.getValueType().getVectorElementType() == MVT::i32) { if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) @@ -3273,7 +3382,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Opc; switch (Op.getOpcode()) { @@ -3291,7 +3400,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { if (VT.getVectorElementType() == MVT::f32) @@ -3327,7 +3436,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Opc; switch (Op.getOpcode()) { @@ -3348,7 +3457,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Implement fcopysign with a fabs and a conditional fneg. SDValue Tmp0 = Op.getOperand(0); SDValue Tmp1 = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || @@ -3432,7 +3541,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ MFI->setReturnAddressIsTaken(true); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); @@ -3452,7 +3561,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful + SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) ? ARM::R7 : ARM::R11; @@ -3481,7 +3590,7 @@ static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) return SDValue(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); unsigned NumElts = SrcVT.getVectorNumElements(); @@ -3512,7 +3621,7 @@ static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { /// vectors), since the legalizer won't know what to do with that. static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = N->getOperand(0); // This function is only supposed to be called for i64 types, either as the @@ -3549,7 +3658,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { /// not support i64 elements, so sometimes the zero vectors will need to be /// explicitly constructed. Regardless, use a canonical VMOV to create the /// zero vector. -static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { +static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) { assert(VT.isVector() && "Expected a vector type"); // The canonical modified immediate encoding of a zero vector is....0! SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); @@ -3565,7 +3674,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); @@ -3601,7 +3710,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); @@ -3634,7 +3743,7 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) // so that the shift + and get folded into a bitfield extract. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, DAG.getConstant(Intrinsic::arm_get_fpscr, MVT::i32)); @@ -3649,7 +3758,7 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (!ST->hasV6T2Ops()) return SDValue(); @@ -3673,7 +3782,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); @@ -3695,7 +3804,7 @@ static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { /// v4i16:Extracted = [k0 k1 k2 k3 ] static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue BitCounts = getCTPOP16BitCounts(N, DAG); if (VT.is64BitVector()) { @@ -3730,7 +3839,7 @@ static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { /// static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; @@ -3769,7 +3878,7 @@ static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (!VT.isVector()) return SDValue(); @@ -3804,7 +3913,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // We can get here for a node like i32 = ISD::SHL i32, i64 if (VT != MVT::i64) @@ -3850,7 +3959,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue CC = Op.getOperand(2); EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Op.getOperand(1).getValueType().isFloatingPoint()) { switch (SetCCOpcode) { @@ -4119,7 +4228,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, APFloat FPVal = CFP->getValueAPF(); int ImmVal = ARM_AM::getFP32Imm(FPVal); if (ImmVal != -1) { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal); @@ -4133,7 +4242,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, VMOVModImm); if (NewVal != SDValue()) { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, NewVal); SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, @@ -4146,7 +4255,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, VMVNModImm); if (NewVal != SDValue()) { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); @@ -4406,7 +4515,7 @@ static bool isReverseMask(ArrayRef<int> M, EVT VT) { // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, - const ARMSubtarget *ST, DebugLoc dl) { + const ARMSubtarget *ST, SDLoc dl) { uint64_t Val; if (!isa<ConstantSDNode>(N)) return SDValue(); @@ -4427,7 +4536,7 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); APInt SplatBits, SplatUndef; @@ -4617,7 +4726,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // shuffle in combination with VEXTs. SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); @@ -4806,7 +4915,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, - DebugLoc dl) { + SDLoc dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); @@ -4886,7 +4995,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, // Check to see if we can use the VTBL instruction. SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SmallVector<SDValue, 8> VTBLMask; for (ArrayRef<int>::iterator @@ -4905,7 +5014,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, SelectionDAG &DAG) { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue OpLHS = Op.getOperand(0); EVT VT = OpLHS.getValueType(); @@ -4923,7 +5032,7 @@ static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); @@ -5087,7 +5196,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue Vec = Op.getOperand(0); if (Op.getValueType() == MVT::i32 && Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); } @@ -5099,7 +5208,7 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { // two 64-bit vectors are concatenated to a 128-bit vector. assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue Val = DAG.getUNDEF(MVT::v2f64); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -5188,6 +5297,23 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { return false; } +static EVT getExtensionTo64Bits(const EVT &OrigVT) { + if (OrigVT.getSizeInBits() >= 64) + return OrigVT; + + assert(OrigVT.isSimple() && "Expecting a simple value type"); + + MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; + switch (OrigSimpleTy) { + default: llvm_unreachable("Unexpected Vector Type"); + case MVT::v2i8: + case MVT::v2i16: + return MVT::v2i32; + case MVT::v4i8: + return MVT::v4i16; + } +} + /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. /// We insert the required extension here to get the vector to fill a D register. @@ -5203,19 +5329,9 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, return N; // Must extend size to at least 64 bits to be used as an operand for VMULL. - MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy; - EVT NewVT; - switch (OrigSimpleTy) { - default: llvm_unreachable("Unexpected Orig Vector Type"); - case MVT::v2i8: - case MVT::v2i16: - NewVT = MVT::v2i32; - break; - case MVT::v4i8: - NewVT = MVT::v4i16; - break; - } - return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N); + EVT NewVT = getExtensionTo64Bits(OrigTy); + + return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); } /// SkipLoadExtensionForVMULL - return a load of the original vector size that @@ -5224,22 +5340,22 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, /// reach a total size of 64 bits. We have to add the extension separately /// because ARM does not have a sign/zero extending load for vectors. static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { - SDValue NonExtendingLoad = - DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), + EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); + + // The load already has the right type. + if (ExtendedTy == LD->getMemoryVT()) + return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); - unsigned ExtOp = 0; - switch (LD->getExtensionType()) { - default: llvm_unreachable("Unexpected LoadExtType"); - case ISD::EXTLOAD: - case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break; - } - MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy; - MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy; - return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG, - MemType, ExtType, ExtOp); + + // We need to create a zextload/sextload. We cannot just create a load + // followed by a zext/zext node because LowerMUL is also run during normal + // operation legalization where we can't create illegal types. + return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, + LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), + LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); } /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, @@ -5265,7 +5381,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { assert(BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32, BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); } // Construct a new BUILD_VECTOR with elements truncated to half the size. @@ -5282,7 +5398,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); } - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); } @@ -5354,7 +5470,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { } // Legalize to a VMULL instruction. - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Op0; SDValue Op1 = SkipExtensionForVMULL(N1, DAG); if (!isMLA) { @@ -5384,7 +5500,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { } static SDValue -LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { +LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { // Convert to float // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); @@ -5413,7 +5529,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { } static SDValue -LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { +LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { SDValue N2; // Convert to float. // float4 yf = vcvt_f32_s32(vmovl_s16(y)); @@ -5454,7 +5570,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; @@ -5489,7 +5605,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; @@ -5573,9 +5689,9 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { } if (!ExtraOp) - return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); - return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } @@ -5589,11 +5705,10 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { return SDValue(); } - static void ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, SelectionDAG &DAG, unsigned NewOp) { - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); assert (Node->getValueType(0) == MVT::i64 && "Only know how to expand i64 atomics"); @@ -5623,6 +5738,44 @@ ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, Results.push_back(Result.getValue(2)); } +static void ReplaceREADCYCLECOUNTER(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + SDLoc DL(N); + SDValue Cycles32, OutChain; + + if (Subtarget->hasPerfMon()) { + // Under Power Management extensions, the cycle-count is: + // mrc p15, #0, <Rt>, c9, c13, #0 + SDValue Ops[] = { N->getOperand(0), // Chain + DAG.getConstant(Intrinsic::arm_mrc, MVT::i32), + DAG.getConstant(15, MVT::i32), + DAG.getConstant(0, MVT::i32), + DAG.getConstant(9, MVT::i32), + DAG.getConstant(13, MVT::i32), + DAG.getConstant(0, MVT::i32) + }; + + Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, + DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], + array_lengthof(Ops)); + OutChain = Cycles32.getValue(1); + } else { + // Intrinsic is defined to return 0 on unsupported platforms. Technically + // there are older ARM CPUs that have implementation-specific ways of + // obtaining this information (FIXME!). + Cycles32 = DAG.getConstant(0, MVT::i32); + OutChain = DAG.getEntryNode(); + } + + + SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, + Cycles32, DAG.getConstant(0, MVT::i32)); + Results.push_back(Cycles64); + Results.push_back(OutChain); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -5700,6 +5853,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; + case ISD::READCYCLECOUNTER: + ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); + return; case ISD::ATOMIC_LOAD_ADD: ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); return; @@ -7634,13 +7790,13 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, // Slct is now know to be the desired identity constant when CC is true. SDValue TrueVal = OtherOp; - SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); // Unless SwapSelectOps says CC should be false. if (SwapSelectOps) std::swap(TrueVal, FalseVal); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, CCOp, TrueVal, FalseVal); } @@ -7747,9 +7903,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, llvm_unreachable("Invalid vector element type for padd optimization."); } - SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, &Ops[0], Ops.size()); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp); } static SDValue findMUL_LOHI(SDValue V) { @@ -7874,7 +8030,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, Ops.push_back(*LowAdd); Ops.push_back(*HiAdd); - SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(), + SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), DAG.getVTList(MVT::i32, MVT::i32), &Ops[0], Ops.size()); @@ -7982,7 +8138,7 @@ static SDValue PerformVMULCombine(SDNode *N, } EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); return DAG.getNode(Opcode, DL, VT, @@ -8012,11 +8168,11 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); int64_t MulAmt = C->getSExtValue(); - unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); + unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt); ShiftAmt = ShiftAmt & (32 - 1); SDValue V = N->getOperand(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue Res; MulAmt >>= ShiftAmt; @@ -8080,7 +8236,7 @@ static SDValue PerformANDCombine(SDNode *N, // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; @@ -8123,7 +8279,7 @@ static SDValue PerformORCombine(SDNode *N, const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VORR BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; @@ -8198,7 +8354,7 @@ static SDValue PerformORCombine(SDNode *N, if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val // @@ -8233,7 +8389,7 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); if (ARM::isBitFieldInvertedMask(Mask)) { - Val >>= CountTrailingZeros_32(~Mask); + Val >>= countTrailingZeros(~Mask); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, DAG.getConstant(Val, MVT::i32), @@ -8260,7 +8416,7 @@ static SDValue PerformORCombine(SDNode *N, (Mask == 0xffff || Mask == 0xffff0000)) return SDValue(); // 2a - unsigned amt = CountTrailingZeros_32(Mask2); + unsigned amt = countTrailingZeros(Mask2); Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), DAG.getConstant(amt, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, @@ -8276,7 +8432,7 @@ static SDValue PerformORCombine(SDNode *N, (Mask2 == 0xffff || Mask2 == 0xffff0000)) return SDValue(); // 2b - unsigned lsb = CountTrailingZeros_32(Mask); + unsigned lsb = countTrailingZeros(Mask); Res = DAG.getNode(ISD::SRL, DL, VT, N00, DAG.getConstant(lsb, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, @@ -8294,7 +8450,7 @@ static SDValue PerformORCombine(SDNode *N, // where lsb(mask) == #shamt and masked bits of B are known zero. SDValue ShAmt = N00.getOperand(1); unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); - unsigned LSB = CountTrailingZeros_32(Mask); + unsigned LSB = countTrailingZeros(Mask); if (ShAmtC != LSB) return SDValue(); @@ -8337,12 +8493,12 @@ static SDValue PerformBFICombine(SDNode *N, if (!N11C) return SDValue(); unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); - unsigned LSB = CountTrailingZeros_32(~InvMask); - unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB; + unsigned LSB = countTrailingZeros(~InvMask); + unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; unsigned Mask = (1 << Width)-1; unsigned Mask2 = N11C->getZExtValue(); if ((Mask & (~Mask2)) == 0) - return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0), + return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), N->getOperand(0), N1.getOperand(0), N->getOperand(2)); } @@ -8368,7 +8524,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, LoadSDNode *LD = cast<LoadSDNode>(InNode); SelectionDAG &DAG = DCI.DAG; - DebugLoc DL = LD->getDebugLoc(); + SDLoc DL(LD); SDValue BasePtr = LD->getBasePtr(); SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->isVolatile(), @@ -8405,7 +8561,7 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { if (Op0.getOpcode() == ARMISD::VMOVRRD && Op0.getNode() == Op1.getNode() && Op0.getResNo() == 0 && Op1.getResNo() == 1) - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0.getOperand(0)); return SDValue(); } @@ -8447,7 +8603,7 @@ static SDValue PerformSTORECombine(SDNode *N, NumElems*SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - DebugLoc DL = St->getDebugLoc(); + SDLoc DL(St); SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; @@ -8508,7 +8664,7 @@ static SDValue PerformSTORECombine(SDNode *N, if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; - DebugLoc DL = St->getDebugLoc(); + SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore(St->getChain(), DL, StVal.getNode()->getOperand(0), BasePtr, @@ -8530,14 +8686,14 @@ static SDValue PerformSTORECombine(SDNode *N, // Bitcast an i64 store extracted from a vector to f64. // Otherwise, the i64 value will be legalized to a pair of i32 values. SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = StVal.getDebugLoc(); + SDLoc dl(StVal); SDValue IntVec = StVal.getOperand(0); EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, IntVec.getValueType().getVectorNumElements()); SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Vec, StVal.getOperand(1)); - dl = N->getDebugLoc(); + dl = SDLoc(N); SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(Vec.getNode()); @@ -8583,7 +8739,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, EVT VT = N->getValueType(0); if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) return SDValue(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SmallVector<SDValue, 8> Ops; unsigned NumElts = VT.getVectorNumElements(); for (unsigned i = 0; i < NumElts; ++i) { @@ -8610,7 +8766,7 @@ static SDValue PerformInsertEltCombine(SDNode *N, return SDValue(); SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VT.getVectorNumElements()); SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); @@ -8656,7 +8812,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { !TLI.isTypeLegal(Concat1Op1.getValueType())) return SDValue(); - SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Op0.getOperand(0), Op1.getOperand(0)); // Translate the shuffle mask. SmallVector<int, 16> NewMask; @@ -8672,7 +8828,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { NewElt = HalfElts + MaskElt - NumElts; NewMask.push_back(NewElt); } - return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat, + return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, DAG.getUNDEF(VT), NewMask.data()); } @@ -8789,7 +8945,7 @@ static SDValue CombineBaseUpdate(SDNode *N, Ops.push_back(N->getOperand(i)); } MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); - SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys, + SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops.data(), Ops.size(), MemInt->getMemoryVT(), MemInt->getMemOperand()); @@ -8863,7 +9019,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); - SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys, + SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, 2, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); @@ -8918,7 +9074,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N, if (EltSize > VT.getVectorElementType().getSizeInBits()) return SDValue(); - return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); + return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } // isConstVecPow2 - Return true if each vector element is a power of 2, all @@ -8977,7 +9133,7 @@ static SDValue PerformVCVTCombine(SDNode *N, unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0), DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, DAG.getConstant(Log2_64(C), MVT::i32)); @@ -9013,7 +9169,7 @@ static SDValue PerformVDIVCombine(SDNode *N, unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : Intrinsic::arm_neon_vcvtfxu2fp; - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), Op.getValueType(), DAG.getConstant(IntrinsicOpcode, MVT::i32), Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); @@ -9197,7 +9353,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { VShiftOpc = ARMISD::VQRSHRNsu; break; } - return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); } @@ -9214,7 +9370,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); } - return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), DAG.getConstant(Cnt, MVT::i32)); } @@ -9245,7 +9401,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && DAG.MaskedValueIsZero(N0.getOperand(0), APInt::getHighBitsSet(32, 16))) - return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); } } @@ -9262,7 +9418,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, case ISD::SHL: if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) - return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), + return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0), DAG.getConstant(Cnt, MVT::i32)); break; @@ -9271,7 +9427,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? ARMISD::VSHRs : ARMISD::VSHRu); - return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), + return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0), DAG.getConstant(Cnt, MVT::i32)); } } @@ -9311,7 +9467,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, Opc = ARMISD::VGETLANEu; break; } - return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); + return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane); } } @@ -9400,7 +9556,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, if (!Opcode) return SDValue(); - return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); + return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); } /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. @@ -9412,7 +9568,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { return SDValue(); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue LHS = Cmp.getOperand(0); SDValue RHS = Cmp.getOperand(1); SDValue FalseVal = N->getOperand(0); @@ -10358,17 +10514,15 @@ ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { bool ARM::isBitFieldInvertedMask(unsigned v) { if (v == 0xffffffff) - return 0; + return false; + // there can be 1's on either or both "outsides", all the "inside" // bits must be 0's - unsigned int lsb = 0, msb = 31; - while (v & (1 << msb)) --msb; - while (v & (1 << lsb)) ++lsb; - for (unsigned int i = lsb; i <= msb; ++i) { - if (v & (1 << i)) - return 0; - } - return 1; + unsigned TO = CountTrailingOnes_32(v); + unsigned LO = CountLeadingOnes_32(v); + v = (v >> TO) << TO; + v = (v << LO) >> LO; + return v == 0; } /// isFPImmLegal - Returns true if the target can instruction select the diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 46b8438676..2b65019df8 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -270,7 +270,7 @@ namespace llvm { } /// getSetCCResultType - Return the value type to use for ISD::SETCC. - virtual EVT getSetCCResultType(EVT VT) const; + virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, @@ -412,7 +412,7 @@ namespace llvm { void addQRTypeForNEON(MVT VT); typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector; - void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, + void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, @@ -421,12 +421,12 @@ namespace llvm { ISD::ArgFlagsTy Flags) const; SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, - DebugLoc dl) const; + SDLoc dl) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const; SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; @@ -463,7 +463,7 @@ namespace llvm { SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, SDValue ThisVal) const; @@ -471,22 +471,26 @@ namespace llvm { LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, + SDLoc dl, SDValue &Chain, const Value *OrigArg, + unsigned InRegsParamRecordIdx, unsigned OffsetFromOrigArg, unsigned ArgOffset, + unsigned ArgSize, bool ForceMutable) const; void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, + SDLoc dl, SDValue &Chain, unsigned ArgOffset, bool ForceMutable = false) const; void computeRegArea(CCState &CCInfo, MachineFunction &MF, + unsigned InRegsParamRecordIdx, + unsigned ArgSize, unsigned &ArgRegsSize, unsigned &ArgRegsSaveSize) const; @@ -520,16 +524,16 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const; virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const; + SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, - SelectionDAG &DAG, DebugLoc dl) const; + SelectionDAG &DAG, SDLoc dl) const; SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 67a6820932..bd9a212928 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1389,7 +1389,6 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, let Inst{15-12} = Dd{3-0}; let Inst{7-0} = addr{7-0}; // imm8 - // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; let Inst{11-9} = 0b101; @@ -1415,7 +1414,6 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, let Inst{15-12} = Sd{4-1}; let Inst{7-0} = addr{7-0}; // imm8 - // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; let Inst{11-9} = 0b101; @@ -1437,6 +1435,28 @@ class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr, } // Load / store multiple + +// Unknown precision +class AXXI4<dag oops, dag iops, IndexMode im, + string asm, string cstr, list<dag> pattern> + : VFPXI<oops, iops, AddrMode4, 4, im, + VFPLdStFrm, NoItinerary, asm, cstr, pattern> { + // Instruction operands. + bits<4> Rn; + bits<13> regs; + + // Encode instruction operands. + let Inst{19-16} = Rn; + let Inst{22} = 0; + let Inst{15-12} = regs{11-8}; + let Inst{7-1} = regs{7-1}; + + let Inst{27-25} = 0b110; + let Inst{11-8} = 0b1011; + let Inst{0} = 1; +} + +// Double precision class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : VFPXI<oops, iops, AddrMode4, 4, im, @@ -1449,14 +1469,15 @@ class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, let Inst{19-16} = Rn; let Inst{22} = regs{12}; let Inst{15-12} = regs{11-8}; - let Inst{7-0} = regs{7-0}; + let Inst{7-1} = regs{7-1}; - // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; let Inst{11-9} = 0b101; let Inst{8} = 1; // Double precision + let Inst{0} = 0; } +// Single Precision class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : VFPXI<oops, iops, AddrMode4, 4, im, @@ -1471,7 +1492,6 @@ class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, let Inst{15-12} = regs{12-9}; let Inst{7-0} = regs{7-0}; - // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; let Inst{11-9} = 0b101; let Inst{8} = 0; // Single precision diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 80f0ec7437..8062111afa 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -29,7 +29,7 @@ using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(*this, STI) { + : ARMBaseInstrInfo(STI), RI(STI) { } /// getNoopForMachoTarget - Return the noop instruction to use for a noop. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 1bd174e341..da815d563d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -275,8 +275,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; -def IsLE : Predicate<"TLI.isLittleEndian()">; -def IsBE : Predicate<"TLI.isBigEndian()">; +def IsLE : Predicate<"TLI->isLittleEndian()">; +def IsBE : Predicate<"TLI->isBigEndian()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -1327,7 +1327,7 @@ class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot), IIC_iEXTr, opc, "\t$Rd, $Rm$rot", [(set GPRnopc:$Rd, (opnode (rotr GPRnopc:$Rm, rot_imm:$rot)))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> { bits<4> Rd; bits<4> Rm; bits<2> rot; @@ -1340,11 +1340,11 @@ class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> class AI_ext_rrot_np<bits<8> opcod, string opc> : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot), IIC_iEXTr, opc, "\t$Rd, $Rm$rot", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> { bits<2> rot; let Inst{19-16} = 0b1111; let Inst{11-10} = rot; -} + } /// AI_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. @@ -1353,7 +1353,7 @@ class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", [(set GPRnopc:$Rd, (opnode GPR:$Rn, (rotr GPRnopc:$Rm, rot_imm:$rot)))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> { bits<4> Rd; bits<4> Rm; bits<4> Rn; @@ -1368,7 +1368,7 @@ class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> class AI_exta_rrot_np<bits<8> opcod, string opc> : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot), IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> { bits<4> Rn; bits<2> rot; let Inst{19-16} = Rn; @@ -1780,7 +1780,8 @@ multiclass APreLoad<bits<1> read, bits<1> data, string opc> { def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload, !strconcat(opc, "\t$addr"), - [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> { + [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]>, + Sched<[WritePreLd]> { bits<4> Rt; bits<17> addr; let Inst{31-26} = 0b111101; @@ -1796,7 +1797,8 @@ multiclass APreLoad<bits<1> read, bits<1> data, string opc> { def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload, !strconcat(opc, "\t$shift"), - [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> { + [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]>, + Sched<[WritePreLd]> { bits<17> shift; let Inst{31-26} = 0b111101; let Inst{25} = 1; // 1 for register form @@ -1863,7 +1865,8 @@ def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, let isNotDuplicable = 1 in { def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), 4, IIC_iALUr, - [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; + [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>, + Sched<[WriteALU, ReadALU]>; let AddedComplexity = 10 in { def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), @@ -1923,11 +1926,11 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), let hasSideEffects = 1 in { def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), - 4, IIC_iALUi, []>; + 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), - 4, IIC_iALUi, []>; + 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; } //===----------------------------------------------------------------------===// @@ -1938,14 +1941,14 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { // ARMV4T and above def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, "bx", "\tlr", [(ARMretflag)]>, - Requires<[IsARM, HasV4T]> { + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { let Inst{27-0} = 0b0001001011111111111100011110; } // ARMV4 only def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br, "mov", "\tpc, lr", [(ARMretflag)]>, - Requires<[IsARM, NoV4T]> { + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> { let Inst{27-0} = 0b0001101000001111000000001110; } } @@ -1955,7 +1958,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // ARMV4T and above def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst", [(brind GPR:$dst)]>, - Requires<[IsARM, HasV4T]> { + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { bits<4> dst; let Inst{31-4} = 0b1110000100101111111111110001; let Inst{3-0} = dst; @@ -1963,7 +1966,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx", "\t$dst", [/* pattern left blank */]>, - Requires<[IsARM, HasV4T]> { + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { bits<4> dst; let Inst{27-4} = 0b000100101111111111110001; let Inst{3-0} = dst; @@ -1980,7 +1983,7 @@ let isCall = 1, def BL : ABXI<0b1011, (outs), (ins bl_target:$func), IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsARM]> { + Requires<[IsARM]>, Sched<[WriteBrL]> { let Inst{31-28} = 0b1110; bits<24> func; let Inst{23-0} = func; @@ -1990,7 +1993,7 @@ let isCall = 1, def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func), IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM]> { + Requires<[IsARM]>, Sched<[WriteBrL]> { bits<24> func; let Inst{23-0} = func; let DecoderMethod = "DecodeBranchImmInstruction"; @@ -2000,7 +2003,7 @@ let isCall = 1, def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, - Requires<[IsARM, HasV5T]> { + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; let Inst{3-0} = func; @@ -2009,7 +2012,7 @@ let isCall = 1, def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx", "\t$func", [(ARMcall_pred GPR:$func)]>, - Requires<[IsARM, HasV5T]> { + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { bits<4> func; let Inst{27-4} = 0b000100101111111111110011; let Inst{3-0} = func; @@ -2019,18 +2022,18 @@ let isCall = 1, // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T]>; + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]>; // ARMv4 def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T]>; + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; // mov lr, pc; b if callee is marked noreturn to avoid confusing the // return stack predictor. def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsARM]>; + Requires<[IsARM]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1 in { @@ -2038,7 +2041,8 @@ let isBranch = 1, isTerminator = 1 in { // a two-value operand where a dag node expects two operands. :( def Bcc : ABI<0b1010, (outs), (ins br_target:$target), IIC_Br, "b", "\t$target", - [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> { + [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>, + Sched<[WriteBr]> { bits<24> target; let Inst{23-0} = target; let DecoderMethod = "DecodeBranchImmInstruction"; @@ -2051,25 +2055,27 @@ let isBranch = 1, isTerminator = 1 in { // should be sufficient. // FIXME: Is B really a Barrier? That doesn't seem right. def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br, - [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>; + [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>, + Sched<[WriteBr]>; let isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : ARMPseudoInst<(outs), (ins GPR:$target, i32imm:$jt, i32imm:$id), 0, IIC_Br, - [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>; + [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>, + Sched<[WriteBr]>; // FIXME: This shouldn't use the generic "addrmode2," but rather be split // into i12 and rs suffixed versions. def BR_JTm : ARMPseudoInst<(outs), (ins addrmode2:$target, i32imm:$jt, i32imm:$id), 0, IIC_Br, [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, - imm:$id)]>; + imm:$id)]>, Sched<[WriteBrTbl]>; def BR_JTadd : ARMPseudoInst<(outs), (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id), 0, IIC_Br, [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, - imm:$id)]>; + imm:$id)]>, Sched<[WriteBrTbl]>; } // isNotDuplicable = 1, isIndirectBranch = 1 } // isBarrier = 1 @@ -2078,7 +2084,7 @@ let isBranch = 1, isTerminator = 1 in { // BLX (immediate) def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary, "blx\t$target", []>, - Requires<[IsARM, HasV5T]> { + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { let Inst{31-25} = 0b1111101; bits<25> target; let Inst{23-0} = target{24-1}; @@ -2087,7 +2093,7 @@ def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary, // Branch and Exchange Jazelle def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", - [/* pattern left blank */]> { + [/* pattern left blank */]>, Sched<[WriteBr]> { bits<4> func; let Inst{23-20} = 0b0010; let Inst{19-8} = 0xfff; @@ -2098,18 +2104,20 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", // Tail calls. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { - def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>; + def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>, + Sched<[WriteBr]>; - def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>; + def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>, + Sched<[WriteBr]>; def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst), 4, IIC_Br, [], (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM]>; + Requires<[IsARM]>, Sched<[WriteBr]>; def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst), 4, IIC_Br, [], - (BX GPR:$dst)>, + (BX GPR:$dst)>, Sched<[WriteBr]>, Requires<[IsARM]>; } @@ -2123,7 +2131,8 @@ def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", // Supervisor Call (Software Interrupt) let isCall = 1, Uses = [SP] in { -def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []> { +def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []>, + Sched<[WriteBr]> { bits<24> svc; let Inst{23-0} = svc; } @@ -2955,7 +2964,7 @@ defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m, let neverHasSideEffects = 1 in def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, - "mov", "\t$Rd, $Rm", []>, UnaryDP { + "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<4> Rm; @@ -2969,7 +2978,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, // A version for the smaller set of tail call registers. let neverHasSideEffects = 1 in def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, - IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP { + IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<4> Rm; @@ -2982,7 +2991,8 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src), DPSoRegRegFrm, IIC_iMOVsr, "mov", "\t$Rd, $src", - [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP { + [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP, + Sched<[WriteALU]> { bits<4> Rd; bits<12> src; let Inst{15-12} = Rd; @@ -2998,7 +3008,7 @@ def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src), def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src), DPSoRegImmFrm, IIC_iMOVsr, "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>, - UnaryDP { + UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<12> src; let Inst{15-12} = Rd; @@ -3011,7 +3021,8 @@ def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src), let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi, - "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP { + "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP, + Sched<[WriteALU]> { bits<4> Rd; bits<12> imm; let Inst{25} = 1; @@ -3025,7 +3036,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm), DPFrm, IIC_iMOVi, "movw", "\t$Rd, $imm", [(set GPR:$Rd, imm0_65535:$imm)]>, - Requires<[IsARM, HasV6T2]>, UnaryDP { + Requires<[IsARM, HasV6T2]>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<16> imm; let Inst{15-12} = Rd; @@ -3041,7 +3052,8 @@ def : InstAlias<"mov${p} $Rd, $imm", Requires<[IsARM]>; def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), - (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; let Constraints = "$src = $Rd" in { def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), @@ -3051,7 +3063,7 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), [(set GPRnopc:$Rd, (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>, UnaryDP, - Requires<[IsARM, HasV6T2]> { + Requires<[IsARM, HasV6T2]>, Sched<[WriteALU]> { bits<4> Rd; bits<16> imm; let Inst{15-12} = Rd; @@ -3063,7 +3075,8 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), } def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), - (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; } // Constraints @@ -3073,7 +3086,7 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>, let Uses = [CPSR] in def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi, [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP, - Requires<[IsARM]>; + Requires<[IsARM]>, Sched<[WriteALU]>; // These aren't really mov instructions, but we have to define them this way // due to flag operands. @@ -3081,10 +3094,10 @@ def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi, let Defs = [CPSR] in { def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP, - Requires<[IsARM]>; + Sched<[WriteALU]>, Requires<[IsARM]>; def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP, - Requires<[IsARM]>; + Sched<[WriteALU]>, Requires<[IsARM]>; } //===----------------------------------------------------------------------===// @@ -3250,7 +3263,8 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc, list<dag> pattern = [], dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm), string asm = "\t$Rd, $Rn, $Rm"> - : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> { + : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern>, + Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rn; bits<4> Rd; bits<4> Rm; @@ -3265,9 +3279,11 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc, // Saturating add/subtract +let DecoderMethod = "DecodeQADDInstruction" in def QADD : AAI<0b00010000, 0b00000101, "qadd", [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))], (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; + def QSUB : AAI<0b00010010, 0b00000101, "qsub", [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))], (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; @@ -3326,7 +3342,7 @@ def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">; def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), MulFrm /* for convenience */, NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -3340,7 +3356,7 @@ def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), MulFrm /* for convenience */, NoItinerary, "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{ bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -3473,7 +3489,7 @@ def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm), def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, "mvn", "\t$Rd, $Rm", - [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP { + [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<4> Rm; let Inst{25} = 0; @@ -3484,7 +3500,8 @@ def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, } def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift), DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", - [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP { + [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP, + Sched<[WriteALU]> { bits<4> Rd; bits<12> shift; let Inst{25} = 0; @@ -3496,7 +3513,8 @@ def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift), } def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift), DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", - [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP { + [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP, + Sched<[WriteALU]> { bits<4> Rd; bits<12> shift; let Inst{25} = 0; @@ -3511,7 +3529,7 @@ def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift), let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMVNi, "mvn", "\t$Rd, $imm", - [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP { + [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<12> imm; let Inst{25} = 1; @@ -4022,7 +4040,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs), let isCompare = 1, Defs = [CPSR] in { def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi, "cmn", "\t$Rn, $imm", - [(ARMcmn GPR:$Rn, so_imm:$imm)]> { + [(ARMcmn GPR:$Rn, so_imm:$imm)]>, + Sched<[WriteCMP, ReadALU]> { bits<4> Rn; bits<12> imm; let Inst{25} = 1; @@ -4038,7 +4057,7 @@ def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi, def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr, "cmn", "\t$Rn, $Rm", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPR:$Rn, GPR:$Rm)]> { + GPR:$Rn, GPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> { bits<4> Rn; bits<4> Rm; let isCommutable = 1; @@ -4056,7 +4075,8 @@ def CMNzrsi : AI1<0b1011, (outs), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr, "cmn", "\t$Rn, $shift", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPR:$Rn, so_reg_imm:$shift)]> { + GPR:$Rn, so_reg_imm:$shift)]>, + Sched<[WriteCMPsi, ReadALU]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; @@ -4074,7 +4094,8 @@ def CMNzrsr : AI1<0b1011, (outs), (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr, "cmn", "\t$Rn, $shift", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPRnopc:$Rn, so_reg_reg:$shift)]> { + GPRnopc:$Rn, so_reg_reg:$shift)]>, + Sched<[WriteCMPsr, ReadALU]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; @@ -4112,11 +4133,13 @@ let usesCustomInserter = 1, isBranch = 1, isTerminator = 1, def BCCi64 : PseudoInst<(outs), (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), IIC_Br, - [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>; + [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>, + Sched<[WriteBr]>; def BCCZi64 : PseudoInst<(outs), (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br, - [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>; + [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>, + Sched<[WriteBr]>; } // usesCustomInserter @@ -4129,20 +4152,20 @@ let isCommutable = 1, isSelect = 1 in def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), 4, IIC_iCMOVr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, so_reg_imm:$shift, pred:$p), 4, IIC_iCMOVsr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, so_reg_reg:$shift, pred:$p), 4, IIC_iCMOVsr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; let isMoveImm = 1 in @@ -4150,14 +4173,15 @@ def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, imm0_65535_expr:$imm, pred:$p), 4, IIC_iMOVi, []>, - RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; + RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>, + Sched<[WriteALU]>; let isMoveImm = 1 in def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, so_imm:$imm, pred:$p), 4, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; // Two instruction predicate mov immediate. let isMoveImm = 1 in @@ -4170,7 +4194,7 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, so_imm:$imm, pred:$p), 4, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; } // neverHasSideEffects @@ -4189,6 +4213,16 @@ def memb_opt : Operand<i32> { let DecoderMethod = "DecodeMemBarrierOption"; } +def InstSyncBarrierOptOperand : AsmOperandClass { + let Name = "InstSyncBarrierOpt"; + let ParserMethod = "parseInstSyncBarrierOptOperand"; +} +def instsyncb_opt : Operand<i32> { + let PrintMethod = "printInstSyncBOption"; + let ParserMatchClass = InstSyncBarrierOptOperand; + let DecoderMethod = "DecodeInstSyncBarrierOption"; +} + // memory barriers protect the atomic sequences let hasSideEffects = 1 in { def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, @@ -4209,7 +4243,7 @@ def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, } // ISB has only full system option -def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, +def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary, "isb", "\t$opt", []>, Requires<[IsARM, HasDB]> { bits<4> opt; @@ -4636,11 +4670,11 @@ def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, - (outs GPR:$Rt), + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", - (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), @@ -4650,7 +4684,7 @@ class MovRCopro2<string opc, bit direction, dag oops, dag iops, list<dag> pattern> : ABXI<0b1110, oops, iops, NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> { - let Inst{31-28} = 0b1111; + let Inst{31-24} = 0b11111110; let Inst{20} = direction; let Inst{4} = 1; @@ -4679,11 +4713,11 @@ def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0)>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, - (outs GPR:$Rt), + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm", - (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, @@ -4820,7 +4854,7 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in { def TPsoft : PseudoInst<(outs), (ins), IIC_Br, - [(set R0, ARMthread_pointer)]>; + [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>; } //===----------------------------------------------------------------------===// @@ -4884,7 +4918,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst), 4, IIC_Br, [(brind GPR:$dst)], (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>, - Requires<[IsARM, NoV4T]>; + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; // Large immediate handling. @@ -5233,7 +5267,7 @@ def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm", cc_out:$s)>; } def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm", - (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>; + (ins GPR:$Rd, GPR:$Rm, pred:$p, cc_out:$s)>; let TwoOperandAliasConstraint = "$Rn = $Rd" in { def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm", (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 896fd0f785..9d1a8ea38b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -626,7 +626,7 @@ class VLD1D<bits<4> op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VLD1Q<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), @@ -634,7 +634,7 @@ class VLD1Q<bits<4> op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VLD1d8 : VLD1D<{0,0,0,?}, "8">; @@ -655,7 +655,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), @@ -663,7 +663,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -674,7 +674,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), @@ -682,7 +682,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -703,7 +703,7 @@ class VLD1D3<bits<4> op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D3WB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), @@ -712,7 +712,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), @@ -720,7 +720,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -744,7 +744,7 @@ class VLD1D4<bits<4> op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D4WB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), @@ -753,7 +753,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), @@ -761,7 +761,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -786,7 +786,7 @@ class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; @@ -810,7 +810,7 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), @@ -818,7 +818,7 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -853,7 +853,7 @@ class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; @@ -872,7 +872,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; @@ -912,7 +912,7 @@ class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; @@ -931,7 +931,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; @@ -1580,14 +1580,14 @@ class VST1D<bits<4> op7_4, string Dt> IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VST1Q<bits<4> op7_4, string Dt> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VST1d8 : VST1D<{0,0,0,?}, "8">; @@ -1608,7 +1608,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), @@ -1617,7 +1617,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1628,7 +1628,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), @@ -1637,7 +1637,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1659,7 +1659,7 @@ class VST1D3<bits<4> op7_4, string Dt> IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D3WB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), @@ -1668,7 +1668,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), @@ -1677,7 +1677,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1704,7 +1704,7 @@ class VST1D4<bits<4> op7_4, string Dt> []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D4WB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), @@ -1713,7 +1713,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), @@ -1722,7 +1722,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1748,7 +1748,7 @@ class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; @@ -1772,7 +1772,7 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -1780,7 +1780,7 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1791,7 +1791,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), @@ -1800,7 +1800,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1835,7 +1835,7 @@ class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; @@ -1854,7 +1854,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; @@ -1894,7 +1894,7 @@ class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; @@ -1913,7 +1913,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; @@ -5509,8 +5509,9 @@ class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), (Ty DPR:$Vm), imm:$index)))]> { - bits<4> index; - let Inst{11-8} = index{3-0}; + bits<3> index; + let Inst{11} = 0b0; + let Inst{10-8} = index{2-0}; } class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> @@ -5525,14 +5526,14 @@ class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> } def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { - let Inst{11-8} = index{3-0}; + let Inst{10-8} = index{2-0}; } def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { - let Inst{11-9} = index{2-0}; + let Inst{10-9} = index{1-0}; let Inst{8} = 0b0; } def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { - let Inst{11-10} = index{1-0}; + let Inst{10} = index{0}; let Inst{9-8} = 0b00; } def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index ae7a5c00bd..1fff41db27 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -310,7 +310,7 @@ def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags), let isNotDuplicable = 1, isCodeGenOnly = 1 in def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "", [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>, - T1Special<{0,0,?,?}> { + T1Special<{0,0,?,?}>, Sched<[WriteALU]> { // A8.6.6 bits<3> dst; let Inst{6-3} = 0b1111; // Rm = pc @@ -323,7 +323,7 @@ def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "", // probably because the instruction can be moved around. def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm), IIC_iALUi, "add", "\t$dst, $sp, $imm", []>, - T1Encoding<{1,0,1,0,1,?}> { + T1Encoding<{1,0,1,0,1,?}>, Sched<[WriteALU]> { // A6.2 & A8.6.8 bits<3> dst; bits<8> imm; @@ -335,7 +335,7 @@ def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm), // ADD sp, sp, #<imm7> def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), IIC_iALUi, "add", "\t$Rdn, $imm", []>, - T1Misc<{0,0,0,0,0,?,?}> { + T1Misc<{0,0,0,0,0,?,?}>, Sched<[WriteALU]> { // A6.2.5 & A8.6.8 bits<7> imm; let Inst{6-0} = imm; @@ -346,7 +346,7 @@ def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), // FIXME: The encoding and the ASM string don't match up. def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), IIC_iALUi, "sub", "\t$Rdn, $imm", []>, - T1Misc<{0,0,0,0,1,?,?}> { + T1Misc<{0,0,0,0,1,?,?}>, Sched<[WriteALU]> { // A6.2.5 & A8.6.214 bits<7> imm; let Inst{6-0} = imm; @@ -367,7 +367,7 @@ def : tInstAlias<"sub${p} sp, sp, $imm", // ADD <Rm>, sp def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr, "add", "\t$Rdn, $sp, $Rn", []>, - T1Special<{0,0,?,?}> { + T1Special<{0,0,?,?}>, Sched<[WriteALU]> { // A8.6.9 Encoding T1 bits<4> Rdn; let Inst{7} = Rdn{3}; @@ -379,7 +379,7 @@ def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr, // ADD sp, <Rm> def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr, "add", "\t$Rdn, $Rm", []>, - T1Special<{0,0,?,?}> { + T1Special<{0,0,?,?}>, Sched<[WriteALU]> { // A8.6.9 Encoding T2 bits<4> Rm; let Inst{7} = 1; @@ -395,7 +395,7 @@ def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr, // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>, - T1Special<{1,1,0,?}> { + T1Special<{1,1,0,?}>, Sched<[WriteBr]> { // A6.2.3 & A8.6.25 bits<4> Rm; let Inst{6-3} = Rm; @@ -406,12 +406,12 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br, - [(ARMretflag)], (tBX LR, pred:$p)>; + [(ARMretflag)], (tBX LR, pred:$p)>, Sched<[WriteBr]>; // Alternative return instruction used by vararg functions. def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p), 2, IIC_Br, [], - (tBX GPR:$Rm, pred:$p)>; + (tBX GPR:$Rm, pred:$p)>, Sched<[WriteBr]>; } // All calls clobber the non-callee saved registers. SP is marked as a use to @@ -424,7 +424,7 @@ let isCall = 1, (outs), (ins pred:$p, t_bltarget:$func), IIC_Br, "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, - Requires<[IsThumb]> { + Requires<[IsThumb]>, Sched<[WriteBrL]> { bits<24> func; let Inst{26} = func{23}; let Inst{25-16} = func{20-11}; @@ -438,7 +438,7 @@ let isCall = 1, (outs), (ins pred:$p, t_blxtarget:$func), IIC_Br, "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T]> { + Requires<[IsThumb, HasV5T]>, Sched<[WriteBrL]> { bits<24> func; let Inst{26} = func{23}; let Inst{25-16} = func{20-11}; @@ -453,7 +453,7 @@ let isCall = 1, "blx${p}\t$func", [(ARMtcall GPR:$func)]>, Requires<[IsThumb, HasV5T]>, - T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24; + T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24; bits<4> func; let Inst{6-3} = func; let Inst{2-0} = 0b000; @@ -463,14 +463,14 @@ let isCall = 1, def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func), 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only]>; + Requires<[IsThumb, IsThumb1Only]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br, "b", "\t$target", [(br bb:$target)]>, - T1Encoding<{1,1,1,0,0,?}> { + T1Encoding<{1,1,1,0,0,?}>, Sched<[WriteBr]> { bits<11> target; let Inst{10-0} = target; } @@ -480,12 +480,14 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { // the clobber of LR. let Defs = [LR] in def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p), - 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>; + 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>, + Sched<[WriteBrTbl]>; def tBR_JTr : tPseudoInst<(outs), (ins tGPR:$target, i32imm:$jt, i32imm:$id), 0, IIC_Br, - [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> { + [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>, + Sched<[WriteBrTbl]> { list<Predicate> Predicates = [IsThumb, IsThumb1Only]; } } @@ -496,7 +498,7 @@ let isBranch = 1, isTerminator = 1 in def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br, "b${p}\t$target", [/*(ARMbrcond bb:$target, imm:$cc)*/]>, - T1BranchCond<{1,1,0,1}> { + T1BranchCond<{1,1,0,1}>, Sched<[WriteBr]> { bits<4> p; bits<8> target; let Inst{11-8} = p; @@ -510,7 +512,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst), 4, IIC_Br, [], (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb]>; + Requires<[IsThumb]>, Sched<[WriteBr]>; } // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls // on IOS), so it's in ARMInstrThumb2.td. @@ -520,7 +522,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { (ins t_brtarget:$dst, pred:$p), 4, IIC_Br, [], (tB t_brtarget:$dst, pred:$p)>, - Requires<[IsThumb, IsNotIOS]>; + Requires<[IsThumb, IsNotIOS]>, Sched<[WriteBr]>; } } @@ -530,7 +532,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // If Inst{11-8} == 0b1111 then SEE SVC let isCall = 1, Uses = [SP] in def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br, - "svc", "\t$imm", []>, Encoding16 { + "svc", "\t$imm", []>, Encoding16, Sched<[WriteBr]> { bits<8> imm; let Inst{15-12} = 0b1101; let Inst{11-8} = 0b1111; @@ -540,7 +542,7 @@ def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br, // The assembler uses 0xDEFE for a trap instruction. let isBarrier = 1, isTerminator = 1 in def tTRAP : TI<(outs), (ins), IIC_Br, - "trap", [(trap)]>, Encoding16 { + "trap", [(trap)]>, Encoding16, Sched<[WriteBr]> { let Inst = 0xdefe; } @@ -833,14 +835,15 @@ let isCommutable = 1, Uses = [CPSR] in def tADC : // A8.6.2 T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "adc", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // Add immediate def tADDi3 : // A8.6.4 T1 T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), IIC_iALUi, "add", "\t$Rd, $Rm, $imm3", - [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> { + [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]>, + Sched<[WriteALU]> { bits<3> imm3; let Inst{8-6} = imm3; } @@ -849,7 +852,8 @@ def tADDi8 : // A8.6.4 T2 T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi, "add", "\t$Rdn, $imm8", - [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>; + [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>, + Sched<[WriteALU]>; // Add register let isCommutable = 1 in @@ -857,12 +861,12 @@ def tADDrr : // A8.6.6 T1 T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "add", "\t$Rd, $Rn, $Rm", - [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; let neverHasSideEffects = 1 in def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr, "add", "\t$Rdn, $Rm", []>, - T1Special<{0,0,?,?}> { + T1Special<{0,0,?,?}>, Sched<[WriteALU]> { // A8.6.6 T2 bits<4> Rdn; bits<4> Rm; @@ -877,14 +881,15 @@ def tAND : // A8.6.12 T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iBITr, "and", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // ASR immediate def tASRri : // A8.6.14 T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5), IIC_iMOVsi, "asr", "\t$Rd, $Rm, $imm5", - [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]> { + [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]>, + Sched<[WriteALU]> { bits<5> imm5; let Inst{10-6} = imm5; } @@ -894,14 +899,15 @@ def tASRrr : // A8.6.15 T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iMOVsr, "asr", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // BIC register def tBIC : // A8.6.20 T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iBITr, "bic", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>; + [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>, + Sched<[WriteALU]>; // CMN register let isCompare = 1, Defs = [CPSR] in { @@ -917,7 +923,7 @@ def tCMNz : // A8.6.33 T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iCMPr, "cmn", "\t$Rn, $Rm", - [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>; + [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>, Sched<[WriteCMP]>; } // isCompare = 1, Defs = [CPSR] @@ -926,7 +932,7 @@ let isCompare = 1, Defs = [CPSR] in { def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi, "cmp", "\t$Rn, $imm8", [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>, - T1General<{1,0,1,?,?}> { + T1General<{1,0,1,?,?}>, Sched<[WriteCMP]> { // A8.6.35 bits<3> Rn; bits<8> imm8; @@ -939,11 +945,11 @@ def tCMPr : // A8.6.36 T1 T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iCMPr, "cmp", "\t$Rn, $Rm", - [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>; + [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>, Sched<[WriteCMP]>; def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr, "cmp", "\t$Rn, $Rm", []>, - T1Special<{0,1,?,?}> { + T1Special<{0,1,?,?}>, Sched<[WriteCMP]> { // A8.6.36 T2 bits<4> Rm; bits<4> Rn; @@ -960,14 +966,15 @@ def tEOR : // A8.6.45 T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iBITr, "eor", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // LSL immediate def tLSLri : // A8.6.88 T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_31:$imm5), IIC_iMOVsi, "lsl", "\t$Rd, $Rm, $imm5", - [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> { + [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]>, + Sched<[WriteALU]> { bits<5> imm5; let Inst{10-6} = imm5; } @@ -977,14 +984,15 @@ def tLSLrr : // A8.6.89 T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iMOVsr, "lsl", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // LSR immediate def tLSRri : // A8.6.90 T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5), IIC_iMOVsi, "lsr", "\t$Rd, $Rm, $imm5", - [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]> { + [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]>, + Sched<[WriteALU]> { bits<5> imm5; let Inst{10-6} = imm5; } @@ -994,14 +1002,14 @@ def tLSRrr : // A8.6.91 T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iMOVsr, "lsr", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // Move register let isMoveImm = 1 in def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi, "mov", "\t$Rd, $imm8", [(set tGPR:$Rd, imm0_255:$imm8)]>, - T1General<{1,0,0,?,?}> { + T1General<{1,0,0,?,?}>, Sched<[WriteALU]> { // A8.6.96 bits<3> Rd; bits<8> imm8; @@ -1019,7 +1027,7 @@ let neverHasSideEffects = 1 in { def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone, 2, IIC_iMOVr, "mov", "\t$Rd, $Rm", "", []>, - T1Special<{1,0,?,?}> { + T1Special<{1,0,?,?}>, Sched<[WriteALU]> { // A8.6.97 bits<4> Rd; bits<4> Rm; @@ -1029,7 +1037,7 @@ def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone, } let Defs = [CPSR] in def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, - "movs\t$Rd, $Rm", []>, Encoding16 { + "movs\t$Rd, $Rm", []>, Encoding16, Sched<[WriteALU]> { // A8.6.97 bits<3> Rd; bits<3> Rm; @@ -1060,7 +1068,7 @@ def :tInstAlias<"mul${s}${p} $Rdm, $Rn", (tMUL tGPR:$Rdm, s_cc_out:$s, tGPR:$Rn, def tMVN : // A8.6.107 T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr, "mvn", "\t$Rd, $Rn", - [(set tGPR:$Rd, (not tGPR:$Rn))]>; + [(set tGPR:$Rd, (not tGPR:$Rn))]>, Sched<[WriteALU]>; // Bitwise or register let isCommutable = 1 in @@ -1068,7 +1076,7 @@ def tORR : // A8.6.114 T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iBITr, "orr", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // Swaps def tREV : // A8.6.134 @@ -1076,35 +1084,36 @@ def tREV : // A8.6.134 IIC_iUNAr, "rev", "\t$Rd, $Rm", [(set tGPR:$Rd, (bswap tGPR:$Rm))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>; def tREV16 : // A8.6.135 T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iUNAr, "rev16", "\t$Rd, $Rm", [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>; def tREVSH : // A8.6.136 T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>; // Rotate right register def tROR : // A8.6.139 T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iMOVsr, "ror", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>, + Sched<[WriteALU]>; // Negate register def tRSB : // A8.6.141 T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iALUi, "rsb", "\t$Rd, $Rn, #0", - [(set tGPR:$Rd, (ineg tGPR:$Rn))]>; + [(set tGPR:$Rd, (ineg tGPR:$Rn))]>, Sched<[WriteALU]>; // Subtract with carry register let Uses = [CPSR] in @@ -1112,14 +1121,16 @@ def tSBC : // A8.6.151 T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "sbc", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>, + Sched<[WriteALU]>; // Subtract immediate def tSUBi3 : // A8.6.210 T1 T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), IIC_iALUi, "sub", "\t$Rd, $Rm, $imm3", - [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> { + [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]>, + Sched<[WriteALU]> { bits<3> imm3; let Inst{8-6} = imm3; } @@ -1128,14 +1139,16 @@ def tSUBi8 : // A8.6.210 T2 T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi, "sub", "\t$Rdn, $imm8", - [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>; + [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>, + Sched<[WriteALU]>; // Subtract register def tSUBrr : // A8.6.212 T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "sub", "\t$Rd, $Rn, $Rm", - [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>, + Sched<[WriteALU]>; // Sign-extend byte def tSXTB : // A8.6.222 @@ -1143,7 +1156,8 @@ def tSXTB : // A8.6.222 IIC_iUNAr, "sxtb", "\t$Rd, $Rm", [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, + Sched<[WriteALU]>; // Sign-extend short def tSXTH : // A8.6.224 @@ -1151,14 +1165,16 @@ def tSXTH : // A8.6.224 IIC_iUNAr, "sxth", "\t$Rd, $Rm", [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, + Sched<[WriteALU]>; // Test let isCompare = 1, isCommutable = 1, Defs = [CPSR] in def tTST : // A8.6.230 T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr, "tst", "\t$Rn, $Rm", - [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>; + [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>, + Sched<[WriteALU]>; // Zero-extend byte def tUXTB : // A8.6.262 @@ -1166,7 +1182,8 @@ def tUXTB : // A8.6.262 IIC_iUNAr, "uxtb", "\t$Rd, $Rm", [(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, + Sched<[WriteALU]>; // Zero-extend short def tUXTH : // A8.6.264 @@ -1174,7 +1191,7 @@ def tUXTH : // A8.6.264 IIC_iUNAr, "uxth", "\t$Rd, $Rm", [(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>; // Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation. // Expanded after instruction selection into a branch sequence. @@ -1189,7 +1206,7 @@ let usesCustomInserter = 1 in // Expanded after instruction selection. def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p), IIC_iALUi, "adr{$p}\t$Rd, $addr", []>, - T1Encoding<{1,0,1,0,0,?}> { + T1Encoding<{1,0,1,0,0,?}>, Sched<[WriteALU]> { bits<3> Rd; bits<8> addr; let Inst{10-8} = Rd; @@ -1199,12 +1216,12 @@ def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p), let neverHasSideEffects = 1, isReMaterializable = 1 in def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p), - 2, IIC_iALUi, []>; + 2, IIC_iALUi, []>, Sched<[WriteALU]>; let hasSideEffects = 1 in def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), - 2, IIC_iALUi, []>; + 2, IIC_iALUi, []>, Sched<[WriteALU]>; //===----------------------------------------------------------------------===// // TLS Instructions @@ -1215,7 +1232,8 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), // complete with fixup for the aeabi_read_tp function. let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br, - [(set R0, ARMthread_pointer)]>; + [(set R0, ARMthread_pointer)]>, + Sched<[WriteBr]>; //===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics @@ -1381,13 +1399,13 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in def tPOP_RET : tPseudoExpand<(outs), (ins pred:$p, reglist:$regs, variable_ops), 2, IIC_iPop_Br, [], - (tPOP pred:$p, reglist:$regs)>; + (tPOP pred:$p, reglist:$regs)>, Sched<[WriteBrL]>; // Indirect branch using "mov pc, $Rm" let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def tBRIND : tPseudoExpand<(outs), (ins GPR:$Rm, pred:$p), 2, IIC_Br, [(brind GPR:$Rm)], - (tMOVr PC, GPR:$Rm, pred:$p)>; + (tMOVr PC, GPR:$Rm, pred:$p)>, Sched<[WriteBr]>; } diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 4dacb86df4..ff21bf70ec 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -554,7 +554,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, def ri : T2sTwoRegImm< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii, opc, "\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>, + Sched<[WriteALU, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -563,7 +564,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, // register def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir, opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"), - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -576,7 +578,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, def rs : T2sTwoRegShiftedReg< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis, opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"), - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>, + Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -635,7 +638,8 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { def ri : T2sTwoRegImm< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, opc, ".w\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> { + [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]>, + Sched<[WriteALU, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -645,7 +649,8 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { def rr : T2sThreeReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, opc, "\t$Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]> { + [/* For disassembly only; pattern left blank */]>, + Sched<[WriteALU, ReadALU, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -657,7 +662,8 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { def rs : T2sTwoRegShiftedReg< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> { + [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>, + Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -678,12 +684,14 @@ multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir, (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p), 4, iii, [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn, - t2_so_imm:$imm))]>; + t2_so_imm:$imm))]>, + Sched<[WriteALU, ReadALU]>; // register def rr : t2PseudoInst<(outs rGPR:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm, pred:$p), 4, iir, [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn, - rGPR:$Rm))]> { + rGPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { let isCommutable = Commutable; } // shifted register @@ -691,7 +699,8 @@ multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir, (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p), 4, iis, [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn, - t2_so_reg:$ShiftedRm))]>; + t2_so_reg:$ShiftedRm))]>, + Sched<[WriteALUsi, ReadALUsr]>; } } @@ -704,13 +713,15 @@ multiclass T2I_rbin_s_is<PatFrag opnode> { (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p), 4, IIC_iALUi, [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, - rGPR:$Rn))]>; + rGPR:$Rn))]>, + Sched<[WriteALU, ReadALU]>; // shifted register def rs : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p), 4, IIC_iALUsi, [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, - rGPR:$Rn))]>; + rGPR:$Rn))]>, + Sched<[WriteALUsi, ReadALU]>; } } @@ -725,7 +736,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, def ri : T2sTwoRegImm< (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi, opc, ".w\t$Rd, $Rn, $imm", - [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]> { + [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]>, + Sched<[WriteALU, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24} = 1; @@ -737,7 +749,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, def ri12 : T2I< (outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi, !strconcat(opc, "w"), "\t$Rd, $Rn, $imm", - [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> { + [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]>, + Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> imm; @@ -755,7 +768,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, // register def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm", - [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]> { + [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -769,7 +783,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, def rs : T2sTwoRegShiftedReg< (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", - [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]> { + [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]>, + Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24} = 1; @@ -787,7 +802,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, opc, "\t$Rd, $Rn, $imm", [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_imm:$imm, CPSR))]>, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteALU, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -797,7 +812,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, rGPR:$Rm, CPSR))]>, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteALU, ReadALU, ReadALU]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -811,7 +826,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm, CPSR))]>, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -826,7 +841,8 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> { def ri : T2sTwoRegShiftImm< (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi, opc, ".w\t$Rd, $Rm, $imm", - [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-21} = 0b010010; let Inst{19-16} = 0b1111; // Rn @@ -836,7 +852,8 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> { def rr : T2sThreeReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr, opc, ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-21} = opcod; @@ -880,7 +897,7 @@ let isCompare = 1, Defs = [CPSR] in { def ri : T2OneRegCmpImm< (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii, opc, ".w\t$Rn, $imm", - [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]> { + [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]>, Sched<[WriteCMP]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -892,7 +909,7 @@ let isCompare = 1, Defs = [CPSR] in { def rr : T2TwoRegCmp< (outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir, opc, ".w\t$Rn, $Rm", - [(opnode GPRnopc:$Rn, rGPR:$Rm)]> { + [(opnode GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -906,7 +923,8 @@ let isCompare = 1, Defs = [CPSR] in { def rs : T2OneRegCmpShiftedReg< (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis, opc, ".w\t$Rn, $ShiftedRm", - [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> { + [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>, + Sched<[WriteCMPsi]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -1167,7 +1185,8 @@ class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin, // assembler. def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), (ins t2adrlabel:$addr, pred:$p), - IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []> { + IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []>, + Sched<[WriteALU, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25-24} = 0b10; // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) @@ -1190,12 +1209,12 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), let neverHasSideEffects = 1, isReMaterializable = 1 in def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), - 4, IIC_iALUi, []>; + 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; let hasSideEffects = 1 in def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, - []>; + []>, Sched<[WriteALU, ReadALU]>; //===----------------------------------------------------------------------===// @@ -1520,7 +1539,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc, "\t$addr", - [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> { + [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]>, + Sched<[WritePreLd]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; let Inst{22} = 0; @@ -1537,7 +1557,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc, "\t$addr", - [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]> { + [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]>, + Sched<[WritePreLd]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; let Inst{23} = 0; // U = 0 @@ -1554,7 +1575,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc, "\t$addr", - [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> { + [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]>, + Sched<[WritePreLd]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; let Inst{23} = 0; // add = TRUE for T1 @@ -1743,7 +1765,7 @@ defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; let neverHasSideEffects = 1 in def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr, - "mov", ".w\t$Rd, $Rm", []> { + "mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1763,7 +1785,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, AddedComplexity = 1 in def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi, "mov", ".w\t$Rd, $imm", - [(set rGPR:$Rd, t2_so_imm:$imm)]> { + [(set rGPR:$Rd, t2_so_imm:$imm)]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = 0b0010; @@ -1786,7 +1808,7 @@ def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, "movw", "\t$Rd, $imm", - [(set rGPR:$Rd, imm0_65535:$imm)]> { + [(set rGPR:$Rd, imm0_65535:$imm)]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0010; @@ -1812,7 +1834,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$src, imm0_65535_expr:$imm), IIC_iMOVi, "movt", "\t$Rd, $imm", [(set rGPR:$Rd, - (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> { + (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0110; @@ -1831,7 +1854,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd), } def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), - (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; } // Constraints def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; @@ -2171,7 +2195,7 @@ def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), let Uses = [CPSR] in { def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, "rrx", "\t$Rd, $Rm", - [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> { + [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2185,7 +2209,8 @@ let isCodeGenOnly = 1, Defs = [CPSR] in { def t2MOVsrl_flag : T2TwoRegShiftImm< (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, "lsrs", ".w\t$Rd, $Rm, #1", - [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> { + [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2199,7 +2224,8 @@ def t2MOVsrl_flag : T2TwoRegShiftImm< def t2MOVsra_flag : T2TwoRegShiftImm< (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, "asrs", ".w\t$Rd, $Rm, #1", - [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> { + [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2320,7 +2346,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, // shifted imm def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii, opc, "\t$Rd, $imm", - [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> { + [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]>, Sched<[WriteALU]> { let isAsCheapAsAMove = Cheap; let isReMaterializable = ReMat; let isMoveImm = MoveImm; @@ -2333,7 +2359,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, // register def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir, opc, ".w\t$Rd, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rm))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rm))]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -2345,7 +2371,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, // shifted register def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis, opc, ".w\t$Rd, $ShiftedRm", - [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> { + [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -2804,22 +2831,27 @@ class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, } def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, - "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>; + "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>, + Sched<[WriteALU]>; def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "rbit", "\t$Rd, $Rm", - [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>; + [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>, + Sched<[WriteALU]>; def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, - "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>; + "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>, + Sched<[WriteALU]>; def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "rev16", ".w\t$Rd, $Rm", - [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>; + [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>, + Sched<[WriteALU]>; def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "revsh", ".w\t$Rd, $Rm", - [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>; + [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>, + Sched<[WriteALU]>; def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)), (and (srl rGPR:$Rm, (i32 8)), 0xFF)), @@ -2831,7 +2863,8 @@ def t2PKHBT : T2ThreeReg< [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF), (and (shl rGPR:$Rm, pkh_lsl_amt:$sh), 0xFFFF0000)))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { + Requires<[HasT2ExtractPack, IsThumb2]>, + Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-20} = 0b01100; @@ -2859,7 +2892,8 @@ def t2PKHTB : T2ThreeReg< [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000), (and (sra rGPR:$Rm, pkh_asr_amt:$sh), 0xFFFF)))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { + Requires<[HasT2ExtractPack, IsThumb2]>, + Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-20} = 0b01100; @@ -2900,7 +2934,8 @@ let isCompare = 1, Defs = [CPSR] in { def t2CMNri : T2OneRegCmpImm< (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iCMPi, "cmn", ".w\t$Rn, $imm", - [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]> { + [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]>, + Sched<[WriteCMP, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = 0b1000; @@ -2913,7 +2948,7 @@ let isCompare = 1, Defs = [CPSR] in { (outs), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iCMPr, "cmn", ".w\t$Rn, $Rm", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPRnopc:$Rn, rGPR:$Rm)]> { + GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b1000; @@ -2928,7 +2963,8 @@ let isCompare = 1, Defs = [CPSR] in { (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iCMPsi, "cmn", ".w\t$Rn, $ShiftedRm", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> { + GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>, + Sched<[WriteCMPsi, ReadALU, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b1000; @@ -2968,14 +3004,15 @@ def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, pred:$p), 4, IIC_iCMOVr, [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + RegConstraint<"$false = $Rd">, + Sched<[WriteALU]>; let isMoveImm = 1 in def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm, pred:$p), 4, IIC_iCMOVi, [/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; // FIXME: Pseudo-ize these. For now, just mark codegen only. let isCodeGenOnly = 1 in { @@ -2983,7 +3020,7 @@ let isMoveImm = 1 in def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm), IIC_iCMOVi, "movw", "\t$Rd, $imm", []>, - RegConstraint<"$false = $Rd"> { + RegConstraint<"$false = $Rd">, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0010; @@ -3010,7 +3047,7 @@ def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm), IIC_iCMOVi, "mvn", "\t$Rd, $imm", [/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd"> { + RegConstraint<"$false = $Rd">, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = 0b0011; @@ -3021,7 +3058,7 @@ def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm), class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> { + : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern>, Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -3072,7 +3109,7 @@ def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, let Inst{3-0} = opt; } -def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, +def t2ISB : AInoP<(outs), (ins instsyncb_opt:$opt), ThumbFrm, NoItinerary, "isb", "\t$opt", []>, Requires<[IsThumb, HasDB]> { bits<4> opt; @@ -3243,7 +3280,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, "b", ".w\t$target", - [(br bb:$target)]> { + [(br bb:$target)]>, Sched<[WriteBr]> { let Inst{31-27} = 0b11110; let Inst{15-14} = 0b10; let Inst{12} = 1; @@ -3261,17 +3298,20 @@ let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : t2PseudoInst<(outs), (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, - [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>; + [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>, + Sched<[WriteBr]>; // FIXME: Add a non-pc based case that can be predicated. def t2TBB_JT : t2PseudoInst<(outs), - (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>; + (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>, + Sched<[WriteBr]>; def t2TBH_JT : t2PseudoInst<(outs), - (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>; + (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>, + Sched<[WriteBr]>; def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br, - "tbb", "\t$addr", []> { + "tbb", "\t$addr", []>, Sched<[WriteBrTbl]> { bits<4> Rn; bits<4> Rm; let Inst{31-20} = 0b111010001101; @@ -3284,7 +3324,7 @@ def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br, } def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br, - "tbh", "\t$addr", []> { + "tbh", "\t$addr", []>, Sched<[WriteBrTbl]> { bits<4> Rn; bits<4> Rm; let Inst{31-20} = 0b111010001101; @@ -3304,7 +3344,7 @@ def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br, let isBranch = 1, isTerminator = 1 in def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, "b", ".w\t$target", - [/*(ARMbrcond bb:$target, imm:$cc)*/]> { + [/*(ARMbrcond bb:$target, imm:$cc)*/]>, Sched<[WriteBr]> { let Inst{31-27} = 0b11110; let Inst{15-14} = 0b10; let Inst{12} = 0; @@ -3331,7 +3371,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { (ins uncondbrtarget:$dst, pred:$p), 4, IIC_Br, [], (t2B uncondbrtarget:$dst, pred:$p)>, - Requires<[IsThumb2, IsIOS]>; + Requires<[IsThumb2, IsIOS]>, Sched<[WriteBr]>; } // IT block @@ -3353,7 +3393,8 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Branch and Exchange Jazelle -- for disassembly only // Rm = Inst{19-16} -def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []> { +def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>, + Sched<[WriteBr]> { bits<4> func; let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -3367,7 +3408,7 @@ let isBranch = 1, isTerminator = 1 in { def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, "cbz\t$Rn, $target", []>, T1Misc<{0,0,?,1,?,?,?}>, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteBr]> { // A8.6.27 bits<6> target; bits<3> Rn; @@ -3379,7 +3420,7 @@ let isBranch = 1, isTerminator = 1 in { def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, "cbnz\t$Rn, $target", []>, T1Misc<{1,0,?,1,?,?,?}>, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteBr]> { // A8.6.27 bits<6> target; bits<3> Rn; @@ -3981,7 +4022,7 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm", // Aliases for ADD without the ".w" optional width specifier. def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", - (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + (t2ADDri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"add${p} $Rd, $Rn, $imm", (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm", diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index b5a896c699..597b74a0c7 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -224,7 +224,36 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r", defm : VFPDTAnyInstAlias<"vpop${p}", "$r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>; -// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores +// FLDMX, FSTMX - Load and store multiple unknown precision registers for +// pre-armv6 cores. +// These instruction are deprecated so we don't want them to get selected. +multiclass vfp_ldstx_mult<string asm, bit L_bit> { + // Unknown precision + def XIA : + AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeNone, !strconcat(asm, "iax${p}\t$Rn, $regs"), "", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def XIA_UPD : + AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeUpd, !strconcat(asm, "iax${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + } + def XDB_UPD : + AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeUpd, !strconcat(asm, "dbx${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b10; // Decrement Before + let Inst{21} = 1; + let Inst{20} = L_bit; + } +} + +defm FLDM : vfp_ldstx_mult<"fldm", 1>; +defm FSTM : vfp_ldstx_mult<"fstm", 0>; //===----------------------------------------------------------------------===// // FP Binary Operations. @@ -841,7 +870,8 @@ let Constraints = "$a = $dst" in { class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>, + Sched<[WriteCvtFP]> { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{0}; @@ -852,7 +882,8 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>, + Sched<[WriteCvtFP]> { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{4}; @@ -1300,6 +1331,10 @@ let Uses = [FPSCR] in { "vmrs", "\t$Rt, mvfr0", []>; def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins), "vmrs", "\t$Rt, mvfr1", []>; + def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, fpinst", []>; + def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, fpinst2", []>; } //===----------------------------------------------------------------------===// @@ -1333,6 +1368,11 @@ let Defs = [FPSCR] in { // System level GPR -> FPSID def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src), "vmsr", "\tfpsid, $src", []>; + + def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPR:$src), + "vmsr", "\tfpinst, $src", []>; + def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPR:$src), + "vmsr", "\tfpinst2, $src", []>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index f4248fcfcc..d9ec4fd221 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -36,6 +36,13 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// 'isThumb'. bool hasThumb2; + /// StByValParamsPadding - For parameter that is split between + /// GPRs and memory; while recovering GPRs part, when + /// StackAlignment == 8, and GPRs-part-size mod 8 != 0, + /// we need to insert gap before parameter start address. It allows to + /// "attach" GPR-part to the part that was passed via stack. + unsigned StByValParamsPadding; + /// VarArgsRegSaveSize - Size of the register save area for vararg functions. /// unsigned ArgRegsSaveSize; @@ -129,6 +136,7 @@ public: explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), + StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), @@ -141,7 +149,14 @@ public: bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } bool isThumb2Function() const { return isThumb && hasThumb2; } - unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; } + unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; } + void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; } + + unsigned getArgRegsSaveSize(unsigned Align = 0) const { + if (!Align) + return ArgRegsSaveSize; + return (ArgRegsSaveSize + Align - 1) & ~(Align - 1); + } void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; } bool hasStackFrame() const { return HasStackFrame; } diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 6f3819afd0..a7880364d8 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -18,7 +18,6 @@ using namespace llvm; void ARMRegisterInfo::anchor() { } -ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &sti) - : ARMBaseRegisterInfo(tii, sti) { +ARMRegisterInfo::ARMRegisterInfo(const ARMSubtarget &sti) + : ARMBaseRegisterInfo(sti) { } diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 8a248425c3..fb1537cf94 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -19,13 +19,13 @@ #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { - class ARMSubtarget; - class ARMBaseInstrInfo; + +class ARMSubtarget; struct ARMRegisterInfo : public ARMBaseRegisterInfo { virtual void anchor(); public: - ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); + ARMRegisterInfo(const ARMSubtarget &STI); }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b0f576bc2b..0459d645c4 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -27,31 +27,31 @@ class ARMFReg<bits<16> Enc, string n> : Register<n> { // Subregister indices. let Namespace = "ARM" in { -def qqsub_0 : SubRegIndex; -def qqsub_1 : SubRegIndex; +def qqsub_0 : SubRegIndex<256>; +def qqsub_1 : SubRegIndex<256, 256>; // Note: Code depends on these having consecutive numbers. -def qsub_0 : SubRegIndex; -def qsub_1 : SubRegIndex; -def qsub_2 : SubRegIndex<[qqsub_1, qsub_0]>; -def qsub_3 : SubRegIndex<[qqsub_1, qsub_1]>; - -def dsub_0 : SubRegIndex; -def dsub_1 : SubRegIndex; -def dsub_2 : SubRegIndex<[qsub_1, dsub_0]>; -def dsub_3 : SubRegIndex<[qsub_1, dsub_1]>; -def dsub_4 : SubRegIndex<[qsub_2, dsub_0]>; -def dsub_5 : SubRegIndex<[qsub_2, dsub_1]>; -def dsub_6 : SubRegIndex<[qsub_3, dsub_0]>; -def dsub_7 : SubRegIndex<[qsub_3, dsub_1]>; - -def ssub_0 : SubRegIndex; -def ssub_1 : SubRegIndex; -def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>; -def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>; - -def gsub_0 : SubRegIndex; -def gsub_1 : SubRegIndex; +def qsub_0 : SubRegIndex<128>; +def qsub_1 : SubRegIndex<128, 128>; +def qsub_2 : ComposedSubRegIndex<qqsub_1, qsub_0>; +def qsub_3 : ComposedSubRegIndex<qqsub_1, qsub_1>; + +def dsub_0 : SubRegIndex<64>; +def dsub_1 : SubRegIndex<64, 64>; +def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>; +def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>; +def dsub_4 : ComposedSubRegIndex<qsub_2, dsub_0>; +def dsub_5 : ComposedSubRegIndex<qsub_2, dsub_1>; +def dsub_6 : ComposedSubRegIndex<qsub_3, dsub_0>; +def dsub_7 : ComposedSubRegIndex<qsub_3, dsub_1>; + +def ssub_0 : SubRegIndex<32>; +def ssub_1 : SubRegIndex<32, 32>; +def ssub_2 : ComposedSubRegIndex<dsub_1, ssub_0>; +def ssub_3 : ComposedSubRegIndex<dsub_1, ssub_1>; + +def gsub_0 : SubRegIndex<32>; +def gsub_1 : SubRegIndex<32, 32>; // Let TableGen synthesize the remaining 12 ssub_* indices. // We don't need to name them. } @@ -157,21 +157,26 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; // Current Program Status Register. // We model fpscr with two registers: FPSCR models the control bits and will be -// reserved. FPSCR_NZCV models the flag bits and will be unreserved. -def CPSR : ARMReg<0, "cpsr">; -def APSR : ARMReg<1, "apsr">; -def SPSR : ARMReg<2, "spsr">; -def FPSCR : ARMReg<3, "fpscr">; -def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { +// reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV +// models the APSR when it's accessed by some special instructions. In such cases +// it has the same encoding as PC. +def CPSR : ARMReg<0, "cpsr">; +def APSR : ARMReg<1, "apsr">; +def APSR_NZCV : ARMReg<15, "apsr_nzcv">; +def SPSR : ARMReg<2, "spsr">; +def FPSCR : ARMReg<3, "fpscr">; +def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { let Aliases = [FPSCR]; } def ITSTATE : ARMReg<4, "itstate">; // Special Registers - only available in privileged mode. -def FPSID : ARMReg<0, "fpsid">; -def MVFR1 : ARMReg<6, "mvfr1">; -def MVFR0 : ARMReg<7, "mvfr0">; -def FPEXC : ARMReg<8, "fpexc">; +def FPSID : ARMReg<0, "fpsid">; +def MVFR1 : ARMReg<6, "mvfr1">; +def MVFR0 : ARMReg<7, "mvfr0">; +def FPEXC : ARMReg<8, "fpexc">; +def FPINST : ARMReg<9, "fpinst">; +def FPINST2 : ARMReg<10, "fpinst2">; // Register classes. // @@ -207,6 +212,16 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { }]; } +// GPRs without the PC but with APSR. Some instructions allow accessing the +// APSR, while actually encoding PC in the register field. This is usefull +// for assembly and disassembly only. +def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add GPR, APSR_NZCV)> { + let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)]; + let AltOrderSelect = [{ + return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); + }]; +} + // GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the // implied SP argument list. // FIXME: It would be better to not use this at all and refactor the diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 2d088de96e..528c4ec737 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -69,6 +69,24 @@ def WriteCMP : SchedWrite; def WriteCMPsi : SchedWrite; def WriteCMPsr : SchedWrite; +// Division. +def WriteDiv : SchedWrite; + +// Loads. +def WriteLd : SchedWrite; +def WritePreLd : SchedWrite; + +// Branches. +def WriteBr : SchedWrite; +def WriteBrL : SchedWrite; +def WriteBrTbl : SchedWrite; + +// Fixpoint conversions. +def WriteCvtFP : SchedWrite; + +// Noop. +def WriteNoop : SchedWrite; + // Define TII for use in SchedVariant Predicates. def : PredicateProlog<[{ const ARMBaseInstrInfo *TII = diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 9739ed20ce..d06ad7d669 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -2275,10 +2275,10 @@ def A9Read4 : SchedReadAdvance<3>; // This table follows the ARM Cortex-A9 Technical Reference Manuals, // mostly in order. -def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, +def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, IIC_iMVNi,IIC_iMVNsi, IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; -def :ItinRW<[A9WriteI,A9ReadALU],[IIC_iMVNr]>; +def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>; def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; @@ -2487,10 +2487,59 @@ def : SchedAlias<WriteALUsr, A9WriteALUsr>; def : SchedAlias<WriteALUSsr, A9WriteALUsr>; def : SchedAlias<ReadALU, A9ReadALU>; def : SchedAlias<ReadALUsr, A9ReadALU>; -// FIXME: need to special case AND, ORR, EOR, BIC because they don't read -// advance. But our instrinfo claims it does. +def : InstRW< [WriteALU], + (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr", + "BICrr")>; +def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>; +def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>; + def : SchedAlias<WriteCMP, A9WriteALU>; def : SchedAlias<WriteCMPsi, A9WriteALU>; def : SchedAlias<WriteCMPsr, A9WriteALU>; + +def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi", + "MOVCCsr")>; +def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>; +def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm", + "MOV_ga_dyn")>; +def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>; +def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; + +def : InstRW< [WriteALU], (instregex "SEL")>; + +def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>; + +def : InstRW< [A9WriteM], + (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS", + "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>; +def : InstRW< [A9WriteM, A9WriteMHi], + (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL", + "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB", + "SMLALTT")>; +// FIXME: These instructions used to have NoItinerary. Just copied the one from above. +def : InstRW< [A9WriteM, A9WriteMHi], + (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX", + "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>; + +def : InstRW<[A9WriteM16, A9WriteM16Hi], + (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>; +def : InstRW<[A9WriteM16, A9WriteM16Hi], + (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>; + +def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>; +def : InstRW<[A9WriteLsi], (instregex "LDRrs")>; +def : InstRW<[A9WriteLb], + (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB", + "LDRH", "LDRSH", "LDRSB")>; +def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>; + +def : WriteRes<WriteDiv, []> { let Latency = 0; } + +def : WriteRes<WriteBr, [A9UnitB]>; +def : WriteRes<WriteBrL, [A9UnitB]>; +def : WriteRes<WriteBrTbl, [A9UnitB]>; +def : WriteRes<WritePreLd, []>; +def : SchedAlias<WriteCvtFP, A9WriteF>; +def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } } // SchedModel = CortexA9Model diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td index 7c6df41070..b5cf2518c0 100644 --- a/lib/Target/ARM/ARMScheduleSwift.td +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -1096,9 +1096,27 @@ let SchedModel = SwiftModel in { def SwiftUnitDiv : ProcResource<1>; // Generic resource requirements. + def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>; + def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; } + def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; } + def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; } + def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> { + let Latency = 4; + } + def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> { + let Latency = 6; + } + def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>; + def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; } + def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; } + def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; } + def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; } + def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; } + def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>; + def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>; def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; } - def SwiftWriteP01ThreeCycleTwoUops : - SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]> { + def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01, + SwiftUnitP01]> { let Latency = 3; let NumMicroOps = 2; } @@ -1107,7 +1125,23 @@ let SchedModel = SwiftModel in { let NumMicroOps = 3; let ResourceCycles = [3]; } - + // Plain load without writeback. + def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> { + let Latency = 3; + } + def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> { + let Latency = 4; + } + // A store does not write to a register. + def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> { + let Latency = 0; + } + foreach Num = 1-4 in { + def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>; + } + def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle, + SwiftWriteP01OneCycle, + SwiftWriteP2ThreeCycle]>; // 4.2.4 Arithmetic and Logical. // ALU operation register shifted by immediate variant. def SwiftWriteALUsi : SchedWriteVariant<[ @@ -1137,8 +1171,897 @@ let SchedModel = SwiftModel in { def : ReadAdvance<ReadALU, 0>; def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>; + + def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[ + SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>, + SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]> + ]>; + // 4.2.5 Integer comparison def : WriteRes<WriteCMP, [SwiftUnitP01]>; - def : WriteRes<WriteCMPsi, [SwiftUnitP01]>; - def : WriteRes<WriteCMPsr, [SwiftUnitP01]>; + def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>; + def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>; + + // 4.2.6 Shift, Move + // Shift + // ASR,LSL,ROR,RRX + // MOV(register-shiftedregister) MVN(register-shiftedregister) + // Move + // MOV,MVN + // MOVT + // Sign/Zero extension + def : InstRW<[SwiftWriteP01OneCycle], + (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16", + "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", + "t2UXTB16")>; + // Pseudo instructions. + def : InstRW<[SwiftWriteP01OneCycle2x], + (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm", + "t2MOVi32imm", "t2MOV_ga_dyn")>; + def : InstRW<[SwiftWriteP01OneCycle3x], + (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>; + def : InstRW<[SwiftWriteP01OneCycle2x_load], + (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>; + + def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>; + + def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>, + SchedVar<NoSchedPred, [ SwiftWriteP0OneCycle ]> + ]>; + + // 4.2.7 Select + // SEL + def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>; + + // 4.2.8 Bitfield + // BFI,BFC, SBFX,UBFX + def : InstRW< [SwiftWriteP01TwoCycle], + (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI", + "(t|t2)UBFX", "(t|t2)SBFX")>; + + // 4.2.9 Saturating arithmetic + def : InstRW< [SwiftWriteP01TwoCycle], + (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT", + "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX", + "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD", + "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT", + "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX", + "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>; + + // 4.2.10 Parallel Arithmetic + // Not flag setting. + def : InstRW< [SwiftWriteALUsr], + (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX", + "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8", + "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8", + "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>; + // Flag setting. + def : InstRW< [SwiftWriteP01TwoCycle], + (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX", + "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16", + "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16", + "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16", + "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX", + "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>; + + // 4.2.11 Sum of Absolute Difference + def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >; + def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>], + (instregex "USADA8")>; + + // 4.2.12 Integer Multiply (32-bit result) + // Two sources. + def : InstRW< [SwiftWriteP0FourCycle], + (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT", + "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL", + "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT", + "t2SMULWB", "t2SMULWT", "t2SMUSD")>; + + def SwiftWriteP0P01FiveCycleTwoUops : + SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> { + let Latency = 5; + } + + def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>, + SchedVar<NoSchedPred, [ SwiftWriteP0FourCycle ]> + ]>; + + def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[ + SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>, + SchedVar<NoSchedPred, [ReadALU]> + ]>; + + // Multiply accumulate, three sources + def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU, + SwiftReadAdvanceFourCyclesPred], + (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR", + "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", + "t2SMMLSR")>; + + // 4.2.13 Integer Multiply (32-bit result, Q flag) + def : InstRW< [SwiftWriteP0FourCycle], + (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>; + def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU, + SwiftReadAdvanceFourCyclesPred], + (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX", + "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT", + "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>; + def : InstRW< [SwiftPredP0P01FourFiveCycle], + (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>; + + def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; + } + def SwiftWrite1Cycle : SchedWriteRes<[]> { + let Latency = 1; + let NumMicroOps = 0; + } + def SwiftWrite5Cycle : SchedWriteRes<[]> { + let Latency = 5; + let NumMicroOps = 0; + } + def SwiftWrite6Cycle : SchedWriteRes<[]> { + let Latency = 6; + let NumMicroOps = 0; + } + + // 4.2.14 Integer Multiply, Long + def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle], + (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>; + + def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [2, 3]; + } + + // 4.2.15 Integer Multiply Accumulate, Long + // 4.2.16 Integer Multiply Accumulate, Dual + // 4.2.17 Integer Multiply Accumulate Accumulate, Long + // We are being a bit inaccurate here. + def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU, + SchedReadAdvance<4>, SchedReadAdvance<3>], + (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT", + "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX", + "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT", + "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX", + "t2UMAAL")>; + + def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> { + let NumMicroOps = 1; + let Latency = 14; + let ResourceCycles = [1, 14]; + } + // 4.2.18 Integer Divide + def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround. + def : InstRW <[SwiftDiv], + (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; + + // 4.2.19 Integer Load Single Element + // 4.2.20 Integer Load Signextended + def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { + let Latency = 3; + } + def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { + let Latency = 4; + } + def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01, + SwiftUnitP01]> { + let Latency = 4; + } + def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> { + let Latency = 3; + } + def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2, + SwiftUnitP01]> { + let Latency = 3; + } + def SwiftWrBackOne : SchedWriteRes<[]> { + let Latency = 1; + let NumMicroOps = 0; + } + def SwiftWriteLdFour : SchedWriteRes<[]> { + let Latency = 4; + let NumMicroOps = 0; + } + // Not accurate. + def : InstRW<[SwiftWriteP2ThreeCycle], + (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)", + "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "tLDR[BH](r|i|spi|pci|pciASM)", + "tLDR(r|i|spi|pci|pciASM)")>; + def : InstRW<[SwiftWriteP2ThreeCycle], + (instregex "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>; + def : InstRW<[SwiftWriteP2P01FourCyle], + (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$", + "t2LDRpci_pic", "tLDRS(B|H)")>; + def : InstRW<[SwiftWriteP2P01ThreeCycle, SwiftWrBackOne], + (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)", + "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)", + "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>; + def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne], + (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)", + "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>; + + // 4.2.21 Integer Dual Load + // Not accurate. + def : InstRW<[SwiftWriteP2P2ThreeCycle, SwiftWriteLdFour], + (instregex "t2LDRDi8", "LDRD$")>; + def : InstRW<[SwiftWriteP2P2P01ThreeCycle, SwiftWriteLdFour, SwiftWrBackOne], + (instregex "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>; + + // 4.2.22 Integer Load, Multiple + // NumReg = 1 .. 16 + foreach Lat = 3-25 in { + def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> { + let Latency = Lat; + } + def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> { let Latency = Lat; } + } + // Predicate. + foreach NumAddr = 1-16 in { + def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>; + } + def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; } + def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>; + def SwiftWriteLM : SchedWriteVariant<[ + SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy]>, + SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy]>, + SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy]>, + SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy]>, + SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy]>, + SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy]>, + SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy]>, + SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy]>, + SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy]>, + SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy, + SwiftWriteLM13Cy]>, + SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy, + SwiftWriteLM13Cy, SwiftWriteLM14Cy]>, + SchedVar<SwiftLMAddr13Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy, + SwiftWriteLM13Cy, SwiftWriteLM14Cy, + SwiftWriteLM15Cy]>, + SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy, + SwiftWriteLM13Cy, SwiftWriteLM14Cy, + SwiftWriteLM15Cy, SwiftWriteLM16Cy]>, + SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy, + SwiftWriteLM13Cy, SwiftWriteLM14Cy, + SwiftWriteLM15Cy, SwiftWriteLM16Cy, + SwiftWriteLM17Cy]>, + SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy, + SwiftWriteLM13Cy, SwiftWriteLM14Cy, + SwiftWriteLM15Cy, SwiftWriteLM16Cy, + SwiftWriteLM17Cy, SwiftWriteLM18Cy]>, + // Unknow number of registers, just use resources for two registers. + SchedVar<NoSchedPred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5CyNo, SwiftWriteLM6CyNo, + SwiftWriteLM7CyNo, SwiftWriteLM8CyNo, + SwiftWriteLM9CyNo, SwiftWriteLM10CyNo, + SwiftWriteLM11CyNo, SwiftWriteLM12CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM14CyNo, + SwiftWriteLM15CyNo, SwiftWriteLM16CyNo, + SwiftWriteLM17CyNo, SwiftWriteLM18CyNo]> + + ]> { let Variadic=1; } + + def : InstRW<[SwiftWriteLM, SwiftWriteLDMAddrNoWB], + (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$", + "(t|sys)LDM(IA|DA|DB|IB)$")>; + def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM], + (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/ + "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>; + def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle], + (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>; + // 4.2.23 Integer Store, Single Element + def : InstRW<[SwiftWriteP2], + (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", + "t2STR(i12|i8|s)$", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>; + + def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2], + (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)", + "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)", + "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)", + "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>; + + // 4.2.24 Integer Store, Dual + def : InstRW<[SwiftWriteP2, SwiftWriteP2, SwiftWriteP01OneCycle], + (instregex "STRD$", "t2STRDi8")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2, SwiftWriteP2, + SwiftWriteP01OneCycle], + (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>; + + // 4.2.25 Integer Store, Multiple + def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { + let Latency = 0; + } + foreach NumAddr = 1-16 in { + def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>; + } + def SwiftWriteSTM : SchedWriteVariant<[ + SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM2]>, + SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM3]>, + SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM4]>, + SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM5]>, + SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM6]>, + SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM7]>, + SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM8]>, + SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM9]>, + SchedVar<SwiftLMAddr10Pred,[SwiftWriteSTM10]>, + SchedVar<SwiftLMAddr11Pred,[SwiftWriteSTM11]>, + SchedVar<SwiftLMAddr12Pred,[SwiftWriteSTM12]>, + SchedVar<SwiftLMAddr13Pred,[SwiftWriteSTM13]>, + SchedVar<SwiftLMAddr14Pred,[SwiftWriteSTM14]>, + SchedVar<SwiftLMAddr15Pred,[SwiftWriteSTM15]>, + SchedVar<SwiftLMAddr16Pred,[SwiftWriteSTM16]>, + // Unknow number of registers, just use resources for two registers. + SchedVar<NoSchedPred, [SwiftWriteSTM2]> + ]>; + def : InstRW<[SwiftWriteSTM], + (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM], + (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD", + "PUSH", "tPUSH")>; + + // 4.2.26 Branch + def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; } + def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; } + def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; } + + // 4.2.27 Not issued + def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } + def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>; + + // 4.2.28 Advanced SIMD, Integer, 2 cycle + def : InstRW<[SwiftWriteP0TwoCycle], + (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL", + "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi", + "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST", + "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF", + "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>; + + def : InstRW<[SwiftWriteP1TwoCycle], + (instregex "VEXT", "VREV16", "VREV32", "VREV64")>; + + // 4.2.29 Advanced SIMD, Integer, 4 cycle + // 4.2.30 Advanced SIMD, Integer with Accumulate + def : InstRW<[SwiftWriteP0FourCycle], + (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT", + "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL", + "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD", + "VQSUB")>; + def : InstRW<[SwiftWriteP1FourCycle], + (instregex "VRECPE", "VRSQRTE")>; + + // 4.2.31 Advanced SIMD, Add and Shift with Narrow + def : InstRW<[SwiftWriteP0P1FourCycle], + (instregex "VADDHN", "VSUBHN", "VSHRN")>; + def : InstRW<[SwiftWriteP0P1SixCycle], + (instregex "VRADDHN", "VRSUBHN", "VRSHRN", "VQSHRN", "VQSHRUN", + "VQRSHRN", "VQRSHRUN")>; + + // 4.2.32 Advanced SIMD, Vector Table Lookup + foreach Num = 1-4 in { + def SwiftWrite#Num#xP1TwoCycle : WriteSequence<[SwiftWriteP1TwoCycle], Num>; + } + def : InstRW<[SwiftWrite1xP1TwoCycle], + (instregex "VTB(L|X)1")>; + def : InstRW<[SwiftWrite2xP1TwoCycle], + (instregex "VTB(L|X)2")>; + def : InstRW<[SwiftWrite3xP1TwoCycle], + (instregex "VTB(L|X)3")>; + def : InstRW<[SwiftWrite4xP1TwoCycle], + (instregex "VTB(L|X)4")>; + + // 4.2.33 Advanced SIMD, Transpose + def : InstRW<[SwiftWriteP1FourCycle, SwiftWriteP1FourCycle, + SwiftWriteP1TwoCycle/*RsrcOnly*/, SchedReadAdvance<2>], + (instregex "VSWP", "VTRN", "VUZP", "VZIP")>; + + // 4.2.34 Advanced SIMD and VFP, Floating Point + def : InstRW<[SwiftWriteP0TwoCycle], (instregex "VABS(S|D)$", "VNEG(S|D)$")>; + def : InstRW<[SwiftWriteP0FourCycle], + (instregex "VCMP(D|S|ZD|ZS)$", "VCMPE(D|S|ZD|ZS)")>; + def : InstRW<[SwiftWriteP0FourCycle], + (instregex "VADD(S|f)", "VSUB(S|f)", "VABD", "VPADDf", "VMAX", "VMIN", "VPMAX", + "VPMIN")>; + def : InstRW<[SwiftWriteP0SixCycle], (instregex "VADDD$", "VSUBD$")>; + def : InstRW<[SwiftWriteP1EightCycle], (instregex "VRECPS", "VRSQRTS")>; + + // 4.2.35 Advanced SIMD and VFP, Multiply + def : InstRW<[SwiftWriteP1FourCycle], + (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH", + "VMULL", "VQDMULL")>; + def : InstRW<[SwiftWriteP1SixCycle], + (instregex "VMULD", "VNMULD")>; + def : InstRW<[SwiftWriteP1FourCycle], + (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)", + "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>; + def : InstRW<[SwiftWriteP1EightCycle], (instregex "VFMAfd", "VFMSfd")>; + def : InstRW<[SwiftWriteP1TwelveCyc], (instregex "VFMAfq", "VFMSfq")>; + + // 4.2.36 Advanced SIMD and VFP, Convert + def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>; + // Fixpoint conversions. + def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; } + + // 4.2.37 Advanced SIMD and VFP, Move + def : InstRW<[SwiftWriteP0TwoCycle], + (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc", + "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc", + "FCONST(D|S)")>; + def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>; + def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>], + (instregex "VQMOVN")>; + def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>; + def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>], + (instregex "VDUP(8|16|32)")>; + def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>; + def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP0TwoCycle]>], + (instregex "VMOVSR$", "VSETLN")>; + def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2FourCycle], + (instregex "VMOVRR(D|S)$")>; + def : InstRW<[SwiftWriteP2FourCycle], (instregex "VMOVDRR$")>; + def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>, + WriteSequence<[SwiftWrite1Cycle, SwiftWriteP2FourCycle, + SwiftWriteP1TwoCycle]>], + (instregex "VMOVSRR$")>; + def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle]>], + (instregex "VGETLN(u|i)")>; + def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle, + SwiftWriteP01OneCycle]>], + (instregex "VGETLNs")>; + + // 4.2.38 Advanced SIMD and VFP, Move FPSCR + // Serializing instructions. + def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> { + let Latency = 15; + let ResourceCycles = [15]; + } + def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> { + let Latency = 15; + let ResourceCycles = [15]; + } + def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> { + let Latency = 15; + let ResourceCycles = [15]; + } + def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy], + (instregex "VMRS")>; + def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy], + (instregex "VMSR")>; + // Not serializing. + def : InstRW<[SwiftWriteP0TwoCycle], (instregex "FMSTAT")>; + + // 4.2.39 Advanced SIMD and VFP, Load Single Element + def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDRD$", "VLDRS$")>; + + // 4.2.40 Advanced SIMD and VFP, Store Single Element + def : InstRW<[SwiftWriteLM4Cy], (instregex "VSTRD$", "VSTRS$")>; + + // 4.2.41 Advanced SIMD and VFP, Load Multiple + // 4.2.42 Advanced SIMD and VFP, Store Multiple + + // Resource requirement for permuting, just reserves the resources. + foreach Num = 1-28 in { + def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> { + let Latency = 0; + let NumMicroOps = Num; + let ResourceCycles = [Num]; + } + } + + // Pre RA pseudos - load/store to a Q register as a D register pair. + def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDMQIA$", "VSTMQIA$")>; + + // Post RA not modelled accurately. We assume that register use of width 64 + // bit maps to a D register, 128 maps to a Q register. Not all different kinds + // are accurately represented. + def SwiftWriteVLDM : SchedWriteVariant<[ + // Load of one S register. + SchedVar<SwiftLMAddr1Pred, [SwiftWriteLM4Cy]>, + // Load of one D register. + SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo]>, + // Load of 3 S register. + SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm3]>, + // Load of a Q register (not neccessarily true). We should not be mapping to + // 4 S registers, either. + SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo, + SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>, + // Load of 5 S registers. + SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13CyNo, SwiftWriteLM14CyNo, + SwiftWriteLM17CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm5]>, + // Load of 3 D registers. (Must also be able to handle s register list - + // though, not accurate) + SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM10Cy, SwiftWriteLM14CyNo, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm5]>, + // Load of 7 S registers. + SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13Cy, SwiftWriteLM14CyNo, + SwiftWriteLM17CyNo, SwiftWriteLM18CyNo, + SwiftWriteLM21CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm7]>, + // Load of two Q registers. + SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM13Cy, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm2]>, + // Load of 9 S registers. + SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13Cy, SwiftWriteLM14CyNo, + SwiftWriteLM17CyNo, SwiftWriteLM18CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM25CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm9]>, + // Load of 5 D registers. + SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM10Cy, SwiftWriteLM14Cy, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm5]>, + // Inaccurate: reuse describtion from 9 S registers. + SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13Cy, SwiftWriteLM14CyNo, + SwiftWriteLM17CyNo, SwiftWriteLM18CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM25CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm9]>, + // Load of three Q registers. + SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM11Cy, SwiftWriteLM11Cy, + SwiftWriteLM11CyNo, SwiftWriteLM11CyNo, + SwiftWriteLM11CyNo, SwiftWriteLM11CyNo, + SwiftWriteLM11CyNo, SwiftWriteLM11CyNo, + SwiftWriteLM11CyNo, SwiftWriteLM11CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm3]>, + // Inaccurate: reuse describtion from 9 S registers. + SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13Cy, SwiftWriteLM14CyNo, + SwiftWriteLM17CyNo, SwiftWriteLM18CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM25CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm9]>, + // Load of 7 D registers inaccurate. + SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM10Cy, SwiftWriteLM14Cy, + SwiftWriteLM14Cy, SwiftWriteLM14CyNo, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm7]>, + SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13Cy, SwiftWriteLM14Cy, + SwiftWriteLM17Cy, SwiftWriteLM18CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM25CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm9]>, + // Load of 4 Q registers. + SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM7Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM14Cy, + SwiftWriteLM15Cy, SwiftWriteLM18CyNo, + SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm4]>, + // Unknow number of registers, just use resources for two registers. + SchedVar<NoSchedPred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM13Cy, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm2]> + ]> { let Variadic = 1; } + + def : InstRW<[SwiftWriteVLDM], (instregex "VLDM[SD](IA|DB)$")>; + + def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVLDM], + (instregex "VLDM[SD](IA|DB)_UPD$")>; + + def SwiftWriteVSTM : SchedWriteVariant<[ + // One S register. + SchedVar<SwiftLMAddr1Pred, [SwiftWriteSTM1]>, + // One D register. + SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM1]>, + // Three S registers. + SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM4]>, + // Assume one Q register. + SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM1]>, + SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM6]>, + // Assume three D registers. + SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM4]>, + SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM8]>, + // Assume two Q registers. + SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM3]>, + SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM10]>, + // Assume 5 D registers. + SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>, + SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>, + // Asume three Q registers. + SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>, + SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>, + // Assume 7 D registers. + SchedVar<SwiftLMAddr14Pred, [SwiftWriteSTM8]>, + SchedVar<SwiftLMAddr15Pred, [SwiftWriteSTM16]>, + // Assume four Q registers. + SchedVar<SwiftLMAddr16Pred, [SwiftWriteSTM5]>, + // Asumme two Q registers. + SchedVar<NoSchedPred, [SwiftWriteSTM3]> + ]> { let Variadic = 1; } + + def : InstRW<[SwiftWriteVSTM], (instregex "VSTM[SD](IA|DB)$")>; + + def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVSTM], + (instregex "VSTM[SD](IA|DB)_UPD")>; + + // 4.2.43 Advanced SIMD, Element or Structure Load and Store + def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> { + let Latency = 4; + let ResourceCycles = [2]; + } + def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> { + let Latency = 4; + let ResourceCycles = [3]; + } + foreach Num = 1-2 in { + def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> { + let Latency = 0; + let NumMicroOps = Num; + let ResourceCycles = [Num]; + } + } + // VLDx + // Multiple structures. + // Single element structure loads. + // We assume aligned. + // Single/two register. + def : InstRW<[SwiftWriteLM4Cy], (instregex "VLD1(d|q)(8|16|32|64)$")>; + def : InstRW<[SwiftWriteLM4Cy, SwiftWriteP01OneCycle], + (instregex "VLD1(d|q)(8|16|32|64)wb")>; + // Three register. + def : InstRW<[SwiftWrite3xP2FourCy], + (instregex "VLD1(d|q)(8|16|32|64)T$", "VLD1d64TPseudo")>; + def : InstRW<[SwiftWrite3xP2FourCy, SwiftWriteP01OneCycle], + (instregex "VLD1(d|q)(8|16|32|64)Twb")>; + /// Four Register. + def : InstRW<[SwiftWrite2xP2FourCy], + (instregex "VLD1(d|q)(8|16|32|64)Q$", "VLD1d64QPseudo")>; + def : InstRW<[SwiftWrite2xP2FourCy, SwiftWriteP01OneCycle], + (instregex "VLD1(d|q)(8|16|32|64)Qwb")>; + // Two element structure loads. + // Two/four register. + def : InstRW<[SwiftWriteLM9Cy, SwiftExt2xP0, SwiftVLDMPerm2], + (instregex "VLD2(d|q|b)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftExt2xP0, + SwiftVLDMPerm2], + (instregex "VLD2(d|q|b)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>; + // Three element structure. + def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo, + SwiftVLDMPerm3, SwiftWrite3xP2FourCy], + (instregex "VLD3(d|q)(8|16|32)$")>; + def : InstRW<[SwiftWriteLM9Cy, SwiftVLDMPerm3, SwiftWrite3xP2FourCy], + (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>; + + def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm3, SwiftWrite3xP2FourCy], + (instregex "VLD3(d|q)(8|16|32)_UPD$")>; + def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm3, + SwiftWrite3xP2FourCy], + (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + // Four element structure loads. + def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy, + SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4, + SwiftWrite3xP2FourCy], + (instregex "VLD4(d|q)(8|16|32)$")>; + def : InstRW<[SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4, + SwiftWrite3xP2FourCy], + (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>; + def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy, + SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0, + SwiftVLDMPerm4, SwiftWrite3xP2FourCy], + (instregex "VLD4(d|q)(8|16|32)_UPD")>; + def : InstRW<[SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0, + SwiftVLDMPerm4, SwiftWrite3xP2FourCy], + (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + + // Single all/lane loads. + // One element structure. + def : InstRW<[SwiftWriteLM6Cy, SwiftVLDMPerm2], + (instregex "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm2], + (instregex "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", + "VLD1LNq(8|16|32)Pseudo_UPD")>; + // Two element structure. + def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftExt1xP0, SwiftVLDMPerm2], + (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$", + "VLD2LN(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftWriteP01OneCycle, + SwiftExt1xP0, SwiftVLDMPerm2], + (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>; + def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy, + SwiftExt1xP0, SwiftVLDMPerm2], + (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb")>; + def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy, + SwiftExt1xP0, SwiftVLDMPerm2], + (instregex "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>; + // Three element structure. + def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, SwiftExt1xP0, + SwiftVLDMPerm3], + (instregex "VLD3(DUP|LN)(d|q)(8|16|32)$", + "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, + SwiftWriteP01OneCycle, SwiftExt1xP0, SwiftVLDMPerm3], + (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>; + def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy, + SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3], + (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; + // Four element struture. + def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo, + SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5], + (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$", + "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo, + SwiftWriteLM10CyNo, SwiftWriteP01OneCycle, SwiftExt1xP0, + SwiftVLDMPerm5], + (instregex "VLD4(DUP|LN)(d|q)(8|16|32)_UPD")>; + def : InstRW<[SwiftWriteLM8Cy, SwiftWriteP01OneCycle, SwiftWriteLM9Cy, + SwiftWriteLM10CyNo, SwiftWriteLM10CyNo, SwiftExt1xP0, + SwiftVLDMPerm5], + (instregex "VLD4(DUP|LN)(d|q)(8|16|32)Pseudo_UPD")>; + // VSTx + // Multiple structures. + // Single element structure store. + def : InstRW<[SwiftWrite1xP2], (instregex "VST1d(8|16|32|64)$")>; + def : InstRW<[SwiftWrite2xP2], (instregex "VST1q(8|16|32|64)$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2], + (instregex "VST1d(8|16|32|64)wb")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2], + (instregex "VST1q(8|16|32|64)wb")>; + def : InstRW<[SwiftWrite3xP2], + (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite3xP2], + (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>; + def : InstRW<[SwiftWrite4xP2], + (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2], + (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>; + // Two element structure store. + def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1], + (instregex "VST2(d|b)(8|16|32)$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1], + (instregex "VST2(b|d)(8|16|32)wb")>; + def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2], + (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2], + (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>; + // Three element structure store. + def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2], + (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2], + (instregex "VST3(d|q)(8|16|32)_UPD", + "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; + // Four element structure store. + def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2], + (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm4], + (instregex "VST4(d|q)(8|16|32)_UPD", + "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; + // Single/all lane store. + // One element structure. + def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1], + (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1], + (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>; + // Two element structure. + def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm2], + (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm2], + (instregex "VST2LN(d|q)(8|16|32)_UPD", + "VST2LN(d|q)(8|16|32)Pseudo_UPD")>; + // Three element structure. + def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2], + (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2], + (instregex "VST3LN(d|q)(8|16|32)_UPD", + "VST3LN(d|q)(8|16|32)Pseudo_UPD")>; + // Four element structure. + def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2], + (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2, SwiftVLDMPerm2], + (instregex "VST4LN(d|q)(8|16|32)_UPD", + "VST4LN(d|q)(8|16|32)Pseudo_UPD")>; + + // 4.2.44 VFP, Divide and Square Root + def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> { + let NumMicroOps = 1; + let Latency = 17; + let ResourceCycles = [1, 15]; + } + def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> { + let NumMicroOps = 1; + let Latency = 32; + let ResourceCycles = [1, 30]; + } + def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>; + def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>; + + // Not specified. + def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>; + // Preload. + def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0; + let ResourceCycles = [0]; + } + } diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 41a7e0c2c8..93add6ee33 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -26,7 +26,7 @@ ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { } SDValue -ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, +ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, @@ -140,7 +140,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, // GNU library uses (ptr, value, size) // See RTABI section 4.3.4 SDValue ARMSelectionDAGInfo:: -EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, +EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h index 6419a73729..56c9375855 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -45,7 +45,7 @@ public: ~ARMSelectionDAGInfo(); virtual - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, @@ -55,7 +55,7 @@ public: // Adjust parameters for memset, see RTABI section 4.3.4 virtual - SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, unsigned Align, diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 3b8e56fda4..4d204ceafc 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -38,9 +38,24 @@ static cl::opt<bool> UseFusedMulOps("arm-use-mulops", cl::init(true), cl::Hidden); -static cl::opt<bool> -StrictAlign("arm-strict-align", cl::Hidden, - cl::desc("Disallow all unaligned memory accesses")); +enum AlignMode { + DefaultAlign, + StrictAlign, + NoStrictAlign +}; + +static cl::opt<AlignMode> +Align(cl::desc("Load/store alignment support"), + cl::Hidden, cl::init(DefaultAlign), + cl::values( + clEnumValN(DefaultAlign, "arm-default-align", + "Generate unaligned accesses only on hardware/OS " + "combinations that are known to support them"), + clEnumValN(StrictAlign, "arm-strict-align", + "Disallow all unaligned memory accesses"), + clEnumValN(NoStrictAlign, "arm-no-strict-align", + "Allow unaligned memory accesses"), + clEnumValEnd)); ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, const TargetOptions &Options) @@ -91,6 +106,7 @@ void ARMSubtarget::initializeEnvironment() { HasRAS = false; HasMPExtension = false; FPOnlySP = false; + HasPerfMon = false; HasTrustZone = false; AllowsUnalignedMem = false; Thumb2DSP = false; @@ -162,10 +178,32 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { if (!isThumb() || hasThumb2()) PostRAScheduler = true; - // v6+ may or may not support unaligned mem access depending on the system - // configuration. - if (!StrictAlign && hasV6Ops() && isTargetDarwin()) - AllowsUnalignedMem = true; + switch (Align) { + case DefaultAlign: + // Assume pre-ARMv6 doesn't support unaligned accesses. + // + // ARMv6 may or may not support unaligned accesses depending on the + // SCTLR.U bit, which is architecture-specific. We assume ARMv6 + // Darwin targets support unaligned accesses, and others don't. + // + // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit + // which raises an alignment fault on unaligned accesses. Linux + // defaults this bit to 0 and handles it as a system-wide (not + // per-process) setting. It is therefore safe to assume that ARMv7+ + // Linux targets support unaligned accesses. The same goes for NaCl. + // + // The above behavior is consistent with GCC. + AllowsUnalignedMem = ( + (hasV7Ops() && (isTargetLinux() || isTargetNaCl())) || + (hasV6Ops() && isTargetDarwin())); + break; + case StrictAlign: + AllowsUnalignedMem = false; + break; + case NoStrictAlign: + AllowsUnalignedMem = true; + break; + } // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. uint64_t Bits = getFeatureBits(); diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 038eb76ae1..bc5af96c60 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -148,6 +148,11 @@ protected: /// precision. bool FPOnlySP; + /// If true, the processor supports the Performance Monitor Extensions. These + /// include a generic cycle-counter as well as more fine-grained (often + /// implementation-specific) events. + bool HasPerfMon; + /// HasTrustZone - if true, processor supports TrustZone security extensions bool HasTrustZone; @@ -254,6 +259,7 @@ public: bool hasVMLxForwarding() const { return HasVMLxForwarding; } bool isFPBrccSlow() const { return SlowFPBrcc; } bool isFPOnlySP() const { return FPOnlySP; } + bool hasPerfMon() const { return HasPerfMon; } bool hasTrustZone() const { return HasTrustZone; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } @@ -270,9 +276,8 @@ public: bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } - bool isTargetNaCl() const { - return TargetTriple.getOS() == Triple::NaCl; - } + bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; } + bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } bool isTargetELF() const { return !isTargetDarwin(); } bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 42c7d2c437..17c52c94a0 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -85,6 +85,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { + initAsmInfo(); if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); @@ -117,6 +118,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget.hasThumb2() ? new ARMFrameLowering(Subtarget) : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { + initAsmInfo(); } namespace { diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 114cc9e5c0..c59ca64c11 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -49,6 +49,20 @@ class ARMAsmParser : public MCTargetAsmParser { MCAsmParser &Parser; const MCRegisterInfo *MRI; + // Unwind directives state + SMLoc FnStartLoc; + SMLoc CantUnwindLoc; + SMLoc PersonalityLoc; + SMLoc HandlerDataLoc; + int FPReg; + void resetUnwindDirectiveParserState() { + FnStartLoc = SMLoc(); + CantUnwindLoc = SMLoc(); + PersonalityLoc = SMLoc(); + HandlerDataLoc = SMLoc(); + FPReg = -1; + } + // Map of register aliases registers via the .req directive. StringMap<unsigned> RegisterReqs; @@ -76,7 +90,7 @@ class ARMAsmParser : public MCTargetAsmParser { if (!inITBlock()) return; // Move to the next instruction in the IT block, if there is one. If not, // mark the block as done. - unsigned TZ = CountTrailingZeros_32(ITState.Mask); + unsigned TZ = countTrailingZeros(ITState.Mask); if (++ITState.CurPosition == 5 - TZ) ITState.CurPosition = ~0U; // Done with the IT block after this. } @@ -86,11 +100,11 @@ class ARMAsmParser : public MCTargetAsmParser { MCAsmLexer &getLexer() const { return Parser.getLexer(); } bool Warning(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + ArrayRef<SMRange> Ranges = None) { return Parser.Warning(L, Msg, Ranges); } bool Error(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + ArrayRef<SMRange> Ranges = None) { return Parser.Error(L, Msg, Ranges); } @@ -113,6 +127,14 @@ class ARMAsmParser : public MCTargetAsmParser { bool parseDirectiveUnreq(SMLoc L); bool parseDirectiveArch(SMLoc L); bool parseDirectiveEabiAttr(SMLoc L); + bool parseDirectiveFnStart(SMLoc L); + bool parseDirectiveFnEnd(SMLoc L); + bool parseDirectiveCantUnwind(SMLoc L); + bool parseDirectivePersonality(SMLoc L); + bool parseDirectiveHandlerData(SMLoc L); + bool parseDirectiveSetFP(SMLoc L); + bool parseDirectivePad(SMLoc L); + bool parseDirectiveRegSave(SMLoc L, bool IsVector); StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, bool &CarrySetting, unsigned &ProcessorIMod, @@ -130,12 +152,19 @@ class ARMAsmParser : public MCTargetAsmParser { bool isThumbTwo() const { return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2); } + bool hasThumb() const { + return STI.getFeatureBits() & ARM::HasV4TOps; + } bool hasV6Ops() const { return STI.getFeatureBits() & ARM::HasV6Ops; } bool hasV7Ops() const { return STI.getFeatureBits() & ARM::HasV7Ops; } + bool hasARM() const { + return !(STI.getFeatureBits() & ARM::FeatureNoARM); + } + void SwitchMode() { unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); setAvailableFeatures(FB); @@ -161,6 +190,8 @@ class ARMAsmParser : public MCTargetAsmParser { SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseMemBarrierOptOperand( SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseInstSyncBarrierOptOperand( + SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseProcIFlagsOperand( SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseMSRMaskOperand( @@ -242,7 +273,7 @@ public: }; ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + : MCTargetAsmParser(), STI(_STI), Parser(_Parser), FPReg(-1) { MCAsmParserExtension::Initialize(_Parser); // Cache the MCRegisterInfo. @@ -293,6 +324,7 @@ class ARMOperand : public MCParsedAsmOperand { k_CoprocOption, k_Immediate, k_MemBarrierOpt, + k_InstSyncBarrierOpt, k_Memory, k_PostIndexRegister, k_MSRMask, @@ -336,6 +368,10 @@ class ARMOperand : public MCParsedAsmOperand { ARM_MB::MemBOpt Val; }; + struct ISBOptOp { + ARM_ISB::InstSyncBOpt Val; + }; + struct IFlagsOp { ARM_PROC::IFlags Val; }; @@ -422,6 +458,7 @@ class ARMOperand : public MCParsedAsmOperand { struct CopOp Cop; struct CoprocOptionOp CoprocOption; struct MBOptOp MBOpt; + struct ISBOptOp ISBOpt; struct ITMaskOp ITMask; struct IFlagsOp IFlags; struct MMaskOp MMask; @@ -482,6 +519,8 @@ public: case k_MemBarrierOpt: MBOpt = o.MBOpt; break; + case k_InstSyncBarrierOpt: + ISBOpt = o.ISBOpt; case k_Memory: Memory = o.Memory; break; @@ -564,6 +603,11 @@ public: return MBOpt.Val; } + ARM_ISB::InstSyncBOpt getInstSyncBarrierOpt() const { + assert(Kind == k_InstSyncBarrierOpt && "Invalid access!"); + return ISBOpt.Val; + } + ARM_PROC::IFlags getProcIFlags() const { assert(Kind == k_ProcIFlags && "Invalid access!"); return IFlags.Val; @@ -903,6 +947,7 @@ public: bool isSPRRegList() const { return Kind == k_SPRRegisterList; } bool isToken() const { return Kind == k_Token; } bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; } + bool isInstSyncBarrierOpt() const { return Kind == k_InstSyncBarrierOpt; } bool isMem() const { return Kind == k_Memory; } bool isShifterImm() const { return Kind == k_ShifterImmediate; } bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; } @@ -949,7 +994,7 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Val = CE->getValue(); - return Val > -4096 && Val < 4096; + return (Val == INT32_MIN) || (Val > -4096 && Val < 4096); } bool isAddrMode3() const { // If we have an immediate that's not a constant, treat it as a label @@ -1680,6 +1725,11 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); } + void addInstSyncBarrierOptOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getInstSyncBarrierOpt()))); + } + void addMemNoOffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); @@ -2345,6 +2395,15 @@ public: return Op; } + static ARMOperand *CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, + SMLoc S) { + ARMOperand *Op = new ARMOperand(k_InstSyncBarrierOpt); + Op->ISBOpt.Val = Opt; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) { ARMOperand *Op = new ARMOperand(k_ProcIFlags); Op->IFlags.Val = IFlags; @@ -2399,6 +2458,9 @@ void ARMOperand::print(raw_ostream &OS) const { case k_MemBarrierOpt: OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">"; break; + case k_InstSyncBarrierOpt: + OS << "<ARM_ISB::" << InstSyncBOptToString(getInstSyncBarrierOpt()) << ">"; + break; case k_Memory: OS << "<memory " << " base:" << Memory.BaseRegNum; @@ -3036,7 +3098,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { // There's an optional '#' token here. Normally there wouldn't be, but // inline assemble puts one in, and it's friendly to accept that. if (Parser.getTok().is(AsmToken::Hash)) - Parser.Lex(); // Eat the '#' + Parser.Lex(); // Eat '#' or '$'. const MCExpr *LaneIndex; SMLoc Loc = Parser.getTok().getLoc(); @@ -3354,7 +3416,7 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Tok.is(AsmToken::Dollar) || Tok.is(AsmToken::Integer)) { if (Parser.getTok().isNot(AsmToken::Integer)) - Parser.Lex(); // Eat the '#'. + Parser.Lex(); // Eat '#' or '$'. SMLoc Loc = Parser.getTok().getLoc(); const MCExpr *MemBarrierID; @@ -3383,6 +3445,57 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } +/// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + unsigned Opt; + + if (Tok.is(AsmToken::Identifier)) { + StringRef OptStr = Tok.getString(); + + if (OptStr.lower() == "sy") + Opt = ARM_ISB::SY; + else + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat identifier token. + } else if (Tok.is(AsmToken::Hash) || + Tok.is(AsmToken::Dollar) || + Tok.is(AsmToken::Integer)) { + if (Parser.getTok().isNot(AsmToken::Integer)) + Parser.Lex(); // Eat '#' or '$'. + SMLoc Loc = Parser.getTok().getLoc(); + + const MCExpr *ISBarrierID; + if (getParser().parseExpression(ISBarrierID)) { + Error(Loc, "illegal expression"); + return MatchOperand_ParseFail; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ISBarrierID); + if (!CE) { + Error(Loc, "constant expression expected"); + return MatchOperand_ParseFail; + } + + int Val = CE->getValue(); + if (Val & ~0xf) { + Error(Loc, "immediate value out of range"); + return MatchOperand_ParseFail; + } + + Opt = ARM_ISB::RESERVED_0 + Val; + } else + return MatchOperand_ParseFail; + + Operands.push_back(ARMOperand::CreateInstSyncBarrierOpt( + (ARM_ISB::InstSyncBOpt)Opt, S)); + return MatchOperand_Success; +} + + /// parseProcIFlagsOperand - Try to parse iflags from CPS instruction. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -3602,7 +3715,7 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Error(S, "'be' or 'le' operand expected"); return MatchOperand_ParseFail; } - int Val = StringSwitch<int>(Tok.getString()) + int Val = StringSwitch<int>(Tok.getString().lower()) .Case("be", 1) .Case("le", 0) .Default(-1); @@ -3875,7 +3988,7 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Do immediates first, as we always parse those if we have a '#'. if (Parser.getTok().is(AsmToken::Hash) || Parser.getTok().is(AsmToken::Dollar)) { - Parser.Lex(); // Eat the '#'. + Parser.Lex(); // Eat '#' or '$'. // Explicitly look for a '-', as we need to encode negative zero // differently. bool isNegative = Parser.getTok().is(AsmToken::Minus); @@ -4354,7 +4467,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.getTok().is(AsmToken::Dollar) || Parser.getTok().is(AsmToken::Integer)) { if (Parser.getTok().isNot(AsmToken::Integer)) - Parser.Lex(); // Eat the '#'. + Parser.Lex(); // Eat '#' or '$'. E = Parser.getTok().getLoc(); bool isNegative = getParser().getTok().is(AsmToken::Minus); @@ -4536,7 +4649,7 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { TyOp->getToken() != ".f64")) return MatchOperand_NoMatch; - Parser.Lex(); // Eat the '#'. + Parser.Lex(); // Eat '#' or '$'. // Handle negation, as that still comes through as a separate token. bool isNegative = false; @@ -7398,11 +7511,10 @@ processInstruction(MCInst &Inst, MCOperand &MO = Inst.getOperand(1); unsigned Mask = MO.getImm(); unsigned OrigMask = Mask; - unsigned TZ = CountTrailingZeros_32(Mask); + unsigned TZ = countTrailingZeros(Mask); if ((Inst.getOperand(0).getImm() & 1) == 0) { assert(Mask && TZ <= 3 && "illegal IT mask value!"); - for (unsigned i = 3; i != TZ; --i) - Mask ^= 1 << i; + Mask ^= (0xE << TZ) & 0xF; } MO.setImm(Mask); @@ -7658,6 +7770,24 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveArch(DirectiveID.getLoc()); else if (IDVal == ".eabi_attribute") return parseDirectiveEabiAttr(DirectiveID.getLoc()); + else if (IDVal == ".fnstart") + return parseDirectiveFnStart(DirectiveID.getLoc()); + else if (IDVal == ".fnend") + return parseDirectiveFnEnd(DirectiveID.getLoc()); + else if (IDVal == ".cantunwind") + return parseDirectiveCantUnwind(DirectiveID.getLoc()); + else if (IDVal == ".personality") + return parseDirectivePersonality(DirectiveID.getLoc()); + else if (IDVal == ".handlerdata") + return parseDirectiveHandlerData(DirectiveID.getLoc()); + else if (IDVal == ".setfp") + return parseDirectiveSetFP(DirectiveID.getLoc()); + else if (IDVal == ".pad") + return parseDirectivePad(DirectiveID.getLoc()); + else if (IDVal == ".save") + return parseDirectiveRegSave(DirectiveID.getLoc(), false); + else if (IDVal == ".vsave") + return parseDirectiveRegSave(DirectiveID.getLoc(), true); return true; } @@ -7693,6 +7823,9 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { return Error(L, "unexpected token in directive"); Parser.Lex(); + if (!hasThumb()) + return Error(L, "target does not support Thumb mode"); + if (!isThumb()) SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); @@ -7706,6 +7839,9 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) { return Error(L, "unexpected token in directive"); Parser.Lex(); + if (!hasARM()) + return Error(L, "target does not support ARM mode"); + if (isThumb()) SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); @@ -7795,10 +7931,16 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) { Parser.Lex(); if (Val == 16) { + if (!hasThumb()) + return Error(L, "target does not support Thumb mode"); + if (!isThumb()) SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); } else { + if (!hasARM()) + return Error(L, "target does not support ARM mode"); + if (isThumb()) SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); @@ -7858,6 +8000,219 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { return true; } +/// parseDirectiveFnStart +/// ::= .fnstart +bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { + if (FnStartLoc.isValid()) { + Error(L, ".fnstart starts before the end of previous one"); + Error(FnStartLoc, "previous .fnstart starts here"); + return true; + } + + FnStartLoc = L; + getParser().getStreamer().EmitFnStart(); + return false; +} + +/// parseDirectiveFnEnd +/// ::= .fnend +bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) { + // Check the ordering of unwind directives + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .fnend directive"); + + // Reset the unwind directives parser state + resetUnwindDirectiveParserState(); + + getParser().getStreamer().EmitFnEnd(); + return false; +} + +/// parseDirectiveCantUnwind +/// ::= .cantunwind +bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) { + // Check the ordering of unwind directives + CantUnwindLoc = L; + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .cantunwind directive"); + if (HandlerDataLoc.isValid()) { + Error(L, ".cantunwind can't be used with .handlerdata directive"); + Error(HandlerDataLoc, ".handlerdata was specified here"); + return true; + } + if (PersonalityLoc.isValid()) { + Error(L, ".cantunwind can't be used with .personality directive"); + Error(PersonalityLoc, ".personality was specified here"); + return true; + } + + getParser().getStreamer().EmitCantUnwind(); + return false; +} + +/// parseDirectivePersonality +/// ::= .personality name +bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { + // Check the ordering of unwind directives + PersonalityLoc = L; + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .personality directive"); + if (CantUnwindLoc.isValid()) { + Error(L, ".personality can't be used with .cantunwind directive"); + Error(CantUnwindLoc, ".cantunwind was specified here"); + return true; + } + if (HandlerDataLoc.isValid()) { + Error(L, ".personality must precede .handlerdata directive"); + Error(HandlerDataLoc, ".handlerdata was specified here"); + return true; + } + + // Parse the name of the personality routine + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Parser.eatToEndOfStatement(); + return Error(L, "unexpected input in .personality directive."); + } + StringRef Name(Parser.getTok().getIdentifier()); + Parser.Lex(); + + MCSymbol *PR = getParser().getContext().GetOrCreateSymbol(Name); + getParser().getStreamer().EmitPersonality(PR); + return false; +} + +/// parseDirectiveHandlerData +/// ::= .handlerdata +bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) { + // Check the ordering of unwind directives + HandlerDataLoc = L; + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .personality directive"); + if (CantUnwindLoc.isValid()) { + Error(L, ".handlerdata can't be used with .cantunwind directive"); + Error(CantUnwindLoc, ".cantunwind was specified here"); + return true; + } + + getParser().getStreamer().EmitHandlerData(); + return false; +} + +/// parseDirectiveSetFP +/// ::= .setfp fpreg, spreg [, offset] +bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { + // Check the ordering of unwind directives + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .setfp directive"); + if (HandlerDataLoc.isValid()) + return Error(L, ".setfp must precede .handlerdata directive"); + + // Parse fpreg + SMLoc NewFPRegLoc = Parser.getTok().getLoc(); + int NewFPReg = tryParseRegister(); + if (NewFPReg == -1) + return Error(NewFPRegLoc, "frame pointer register expected"); + + // Consume comma + if (!Parser.getTok().is(AsmToken::Comma)) + return Error(Parser.getTok().getLoc(), "comma expected"); + Parser.Lex(); // skip comma + + // Parse spreg + SMLoc NewSPRegLoc = Parser.getTok().getLoc(); + int NewSPReg = tryParseRegister(); + if (NewSPReg == -1) + return Error(NewSPRegLoc, "stack pointer register expected"); + + if (NewSPReg != ARM::SP && NewSPReg != FPReg) + return Error(NewSPRegLoc, + "register should be either $sp or the latest fp register"); + + // Update the frame pointer register + FPReg = NewFPReg; + + // Parse offset + int64_t Offset = 0; + if (Parser.getTok().is(AsmToken::Comma)) { + Parser.Lex(); // skip comma + + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { + return Error(Parser.getTok().getLoc(), "'#' expected"); + } + Parser.Lex(); // skip hash token. + + const MCExpr *OffsetExpr; + SMLoc ExLoc = Parser.getTok().getLoc(); + SMLoc EndLoc; + if (getParser().parseExpression(OffsetExpr, EndLoc)) + return Error(ExLoc, "malformed setfp offset"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr); + if (!CE) + return Error(ExLoc, "setfp offset must be an immediate"); + + Offset = CE->getValue(); + } + + getParser().getStreamer().EmitSetFP(static_cast<unsigned>(NewFPReg), + static_cast<unsigned>(NewSPReg), + Offset); + return false; +} + +/// parseDirective +/// ::= .pad offset +bool ARMAsmParser::parseDirectivePad(SMLoc L) { + // Check the ordering of unwind directives + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .pad directive"); + if (HandlerDataLoc.isValid()) + return Error(L, ".pad must precede .handlerdata directive"); + + // Parse the offset + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { + return Error(Parser.getTok().getLoc(), "'#' expected"); + } + Parser.Lex(); // skip hash token. + + const MCExpr *OffsetExpr; + SMLoc ExLoc = Parser.getTok().getLoc(); + SMLoc EndLoc; + if (getParser().parseExpression(OffsetExpr, EndLoc)) + return Error(ExLoc, "malformed pad offset"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr); + if (!CE) + return Error(ExLoc, "pad offset must be an immediate"); + + getParser().getStreamer().EmitPad(CE->getValue()); + return false; +} + +/// parseDirectiveRegSave +/// ::= .save { registers } +/// ::= .vsave { registers } +bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { + // Check the ordering of unwind directives + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .save or .vsave directives"); + if (HandlerDataLoc.isValid()) + return Error(L, ".save or .vsave must precede .handlerdata directive"); + + // Parse the register list + SmallVector<MCParsedAsmOperand*, 1> Operands; + if (parseRegisterList(Operands)) + return true; + ARMOperand *Op = (ARMOperand*)Operands[0]; + if (!IsVector && !Op->isRegList()) + return Error(L, ".save expects GPR registers"); + if (IsVector && !Op->isDPRRegList()) + return Error(L, ".vsave expects DPR registers"); + + getParser().getStreamer().EmitRegSave(Op->getRegList(), IsVector); + return false; +} + /// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget); diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ac937f3534..a6eab33af3 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -65,7 +65,7 @@ namespace { void setITState(char Firstcond, char Mask) { // (3 - the number of trailing zeros) is the number of then / else. unsigned CondBit0 = Firstcond & 1; - unsigned NumTZ = CountTrailingZeros_32(Mask); + unsigned NumTZ = countTrailingZeros<uint8_t>(Mask); unsigned char CCBits = static_cast<unsigned char>(Firstcond & 0xf); assert(NumTZ <= 3 && "Invalid IT mask!"); // push condition codes onto the stack the correct order for the pops @@ -156,12 +156,17 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, @@ -236,6 +241,14 @@ static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val, @@ -268,6 +281,8 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, @@ -348,6 +363,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, @@ -402,7 +419,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!"); // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + if (Region.readBytes(Address, 4, bytes) == -1) { Size = 0; return MCDisassembler::Fail; } @@ -492,102 +509,9 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, bool isBranch, uint64_t InstSize, MCInst &MI, const void *Decoder) { const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); - LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback(); - struct LLVMOpInfo1 SymbolicOp; - memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); - SymbolicOp.Value = Value; - void *DisInfo = Dis->getDisInfoBlock(); - - if (!getOpInfo || - !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { - // Clear SymbolicOp.Value from above and also all other fields. - memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); - LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); - if (!SymbolLookUp) - return false; - uint64_t ReferenceType; - if (isBranch) - ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; - else - ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; - const char *ReferenceName; - uint64_t SymbolValue = 0x00000000ffffffffULL & Value; - const char *Name = SymbolLookUp(DisInfo, SymbolValue, &ReferenceType, - Address, &ReferenceName); - if (Name) { - SymbolicOp.AddSymbol.Name = Name; - SymbolicOp.AddSymbol.Present = true; - } - // For branches always create an MCExpr so it gets printed as hex address. - else if (isBranch) { - SymbolicOp.Value = Value; - } - if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) - (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; - if (!Name && !isBranch) - return false; - } - - MCContext *Ctx = Dis->getMCContext(); - const MCExpr *Add = NULL; - if (SymbolicOp.AddSymbol.Present) { - if (SymbolicOp.AddSymbol.Name) { - StringRef Name(SymbolicOp.AddSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - Add = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); - } - } - - const MCExpr *Sub = NULL; - if (SymbolicOp.SubtractSymbol.Present) { - if (SymbolicOp.SubtractSymbol.Name) { - StringRef Name(SymbolicOp.SubtractSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - Sub = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); - } - } - - const MCExpr *Off = NULL; - if (SymbolicOp.Value != 0) - Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); - - const MCExpr *Expr; - if (Sub) { - const MCExpr *LHS; - if (Add) - LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); - else - LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); - else - Expr = LHS; - } else if (Add) { - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); - else - Expr = Add; - } else { - if (Off != 0) - Expr = Off; - else - Expr = MCConstantExpr::Create(0, *Ctx); - } - - if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16) - MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx))); - else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16) - MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); - else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) - MI.addOperand(MCOperand::CreateExpr(Expr)); - else - llvm_unreachable("bad SymbolicOp.VariantKind"); - - return true; + // FIXME: Does it make sense for value to be negative? + return Dis->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, isBranch, + /* Offset */ 0, InstSize); } /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being @@ -602,17 +526,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value, const void *Decoder) { const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); - LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); - if (SymbolLookUp) { - void *DisInfo = Dis->getDisInfoBlock(); - uint64_t ReferenceType; - ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; - const char *ReferenceName; - (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); - if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr || - ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) - (*Dis->CommentStream) << "literal pool for: " << ReferenceName; - } + Dis->tryAddingPcLoadReferenceComment(Value, Address); } // Thumb1 instructions don't have explicit S bits. Rather, they @@ -751,7 +665,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!"); // We want to read exactly 2 bytes of data. - if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) { + if (Region.readBytes(Address, 2, bytes) == -1) { Size = 0; return MCDisassembler::Fail; } @@ -803,7 +717,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + if (Region.readBytes(Address, 4, bytes) == -1) { Size = 0; return MCDisassembler::Fail; } @@ -920,6 +834,21 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, return S; } +static DecodeStatus +DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + if (RegNo == 15) + { + Inst.addOperand(MCOperand::CreateReg(ARM::APSR_NZCV)); + return MCDisassembler::Success; + } + + Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); + return S; +} + static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 7) @@ -927,6 +856,26 @@ static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } +static const uint16_t GPRPairDecoderTable[] = { + ARM::R0_R1, ARM::R2_R3, ARM::R4_R5, ARM::R6_R7, + ARM::R8_R9, ARM::R10_R11, ARM::R12_SP +}; + +static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + if (RegNo > 13) + return MCDisassembler::Fail; + + if ((RegNo & 1) || RegNo == 0xe) + S = MCDisassembler::SoftFail; + + unsigned RegisterPair = GPRPairDecoderTable[RegNo/2]; + Inst.addOperand(MCOperand::CreateReg(RegisterPair)); + return S; +} + static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { unsigned Register = 0; @@ -1030,7 +979,7 @@ static const uint16_t QPRDecoderTable[] = { static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > 31) + if (RegNo > 31 || (RegNo & 1) != 0) return MCDisassembler::Fail; RegNo >>= 1; @@ -1206,7 +1155,7 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, } // Empty register lists are not allowed. - if (CountPopulation_32(Val) == 0) return MCDisassembler::Fail; + if (Val == 0) return MCDisassembler::Fail; for (unsigned i = 0; i < 16; ++i) { if (Val & (1 << i)) { if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder))) @@ -1227,6 +1176,13 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, unsigned Vd = fieldFromInstruction(Val, 8, 5); unsigned regs = fieldFromInstruction(Val, 0, 8); + // In case of unpredictable encoding, tweak the operands. + if (regs == 0 || (Vd + regs) > 32) { + regs = Vd + regs > 32 ? 32 - Vd : regs; + regs = std::max( 1u, regs); + S = MCDisassembler::SoftFail; + } + if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder))) return MCDisassembler::Fail; for (unsigned i = 0; i < (regs - 1); ++i) { @@ -1242,9 +1198,15 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, DecodeStatus S = MCDisassembler::Success; unsigned Vd = fieldFromInstruction(Val, 8, 5); - unsigned regs = fieldFromInstruction(Val, 0, 8); + unsigned regs = fieldFromInstruction(Val, 1, 7); - regs = regs >> 1; + // In case of unpredictable encoding, tweak the operands. + if (regs == 0 || regs > 16 || (Vd + regs) > 32) { + regs = Vd + regs > 32 ? 32 - Vd : regs; + regs = std::max( 1u, regs); + regs = std::min(16u, regs); + S = MCDisassembler::SoftFail; + } if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder))) return MCDisassembler::Fail; @@ -1797,6 +1759,29 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); + + if (pred == 0xF) + return DecodeCPSInstruction(Inst, Insn, Address, Decoder); + + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { @@ -1807,6 +1792,7 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned reglist = fieldFromInstruction(Insn, 0, 16); if (pred == 0xF) { + // Ambiguous with RFE and SRS switch (Inst.getOpcode()) { case ARM::LDMDA: Inst.setOpcode(ARM::RFEDA); @@ -1857,11 +1843,16 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, Inst.setOpcode(ARM::SRSIB_UPD); break; default: - if (!Check(S, MCDisassembler::Fail)) return MCDisassembler::Fail; + return MCDisassembler::Fail; } // For stores (which become SRS's, the only operand is the mode. if (fieldFromInstruction(Insn, 20, 1) == 0) { + // Check SRS encoding constraints + if (!(fieldFromInstruction(Insn, 22, 1) == 1 && + fieldFromInstruction(Insn, 20, 1) == 0)) + return MCDisassembler::Fail; + Inst.addOperand( MCOperand::CreateImm(fieldFromInstruction(Insn, 0, 4))); return S; @@ -1891,6 +1882,13 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, DecodeStatus S = MCDisassembler::Success; + // This decoder is called from multiple location that do not check + // the full encoding is valid before they do. + if (fieldFromInstruction(Insn, 5, 1) != 0 || + fieldFromInstruction(Insn, 16, 1) != 0 || + fieldFromInstruction(Insn, 20, 8) != 0x10) + return MCDisassembler::Fail; + // imod == '01' --> UNPREDICTABLE // NOTE: Even though this is technically UNPREDICTABLE, we choose to // return failure here. The '01' imod value is unprintable, so there's @@ -2432,6 +2430,57 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned type = fieldFromInstruction(Insn, 8, 4); + unsigned align = fieldFromInstruction(Insn, 4, 2); + if (type == 6 && (align & 2)) return MCDisassembler::Fail; + if (type == 7 && (align & 2)) return MCDisassembler::Fail; + if (type == 10 && align == 3) return MCDisassembler::Fail; + + unsigned load = fieldFromInstruction(Insn, 21, 1); + return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder) + : DecodeVSTInstruction(Inst, Insn, Address, Decoder); +} + +static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned size = fieldFromInstruction(Insn, 6, 2); + if (size == 3) return MCDisassembler::Fail; + + unsigned type = fieldFromInstruction(Insn, 8, 4); + unsigned align = fieldFromInstruction(Insn, 4, 2); + if (type == 8 && align == 3) return MCDisassembler::Fail; + if (type == 9 && align == 3) return MCDisassembler::Fail; + + unsigned load = fieldFromInstruction(Insn, 21, 1); + return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder) + : DecodeVSTInstruction(Inst, Insn, Address, Decoder); +} + +static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned size = fieldFromInstruction(Insn, 6, 2); + if (size == 3) return MCDisassembler::Fail; + + unsigned align = fieldFromInstruction(Insn, 4, 2); + if (align & 2) return MCDisassembler::Fail; + + unsigned load = fieldFromInstruction(Insn, 21, 1); + return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder) + : DecodeVSTInstruction(Inst, Insn, Address, Decoder); +} + +static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned size = fieldFromInstruction(Insn, 6, 2); + if (size == 3) return MCDisassembler::Fail; + + unsigned load = fieldFromInstruction(Insn, 21, 1); + return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder) + : DecodeVSTInstruction(Inst, Insn, Address, Decoder); +} + static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3536,6 +3585,15 @@ static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } +static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (Val & ~0xf) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (!Val) return MCDisassembler::Fail; @@ -3551,11 +3609,10 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, unsigned Rn = fieldFromInstruction(Insn, 16, 4); unsigned pred = fieldFromInstruction(Insn, 28, 4); - if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail; + if (Rn == 0xF) + S = MCDisassembler::SoftFail; - if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder))) + if (!Check(S, DecodeGPRPairRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3565,7 +3622,6 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, return S; } - static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder){ DecodeStatus S = MCDisassembler::Success; @@ -3578,12 +3634,10 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; - if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail; - if (Rd == Rn || Rd == Rt || Rd == Rt+1) return MCDisassembler::Fail; + if (Rn == 0xF || Rd == Rn || Rd == Rt || Rd == Rt+1) + S = MCDisassembler::SoftFail; - if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder))) + if (!Check(S, DecodeGPRPairRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -4453,16 +4507,18 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, Vm |= (fieldFromInstruction(Insn, 5, 1) << 4); unsigned imm = fieldFromInstruction(Insn, 16, 6); unsigned cmode = fieldFromInstruction(Insn, 8, 4); + unsigned op = fieldFromInstruction(Insn, 5, 1); DecodeStatus S = MCDisassembler::Success; // VMOVv2f32 is ambiguous with these decodings. if (!(imm & 0x38) && cmode == 0xF) { + if (op == 1) return MCDisassembler::Fail; Inst.setOpcode(ARM::VMOVv2f32); return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); } - if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail); + if (!(imm & 0x20)) return MCDisassembler::Fail; if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder))) return MCDisassembler::Fail; @@ -4481,16 +4537,18 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, Vm |= (fieldFromInstruction(Insn, 5, 1) << 4); unsigned imm = fieldFromInstruction(Insn, 16, 6); unsigned cmode = fieldFromInstruction(Insn, 8, 4); + unsigned op = fieldFromInstruction(Insn, 5, 1); DecodeStatus S = MCDisassembler::Success; // VMOVv4f32 is ambiguous with these decodings. if (!(imm & 0x38) && cmode == 0xF) { + if (op == 1) return MCDisassembler::Fail; Inst.setOpcode(ARM::VMOVv4f32); return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); } - if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail); + if (!(imm & 0x20)) return MCDisassembler::Fail; if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder))) return MCDisassembler::Fail; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 3bcd083a35..7fef795b23 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -660,8 +660,8 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); uint32_t v = ~MO.getImm(); - int32_t lsb = CountTrailingZeros_32(v); - int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb; + int32_t lsb = countTrailingZeros(v); + int32_t width = (32 - countLeadingZeros (v)) - lsb; assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!"); O << markup("<imm:") << '#' << lsb << markup(">") << ", " @@ -674,6 +674,12 @@ void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, O << ARM_MB::MemBOptToString(val); } +void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned val = MI->getOperand(OpNum).getImm(); + O << ARM_ISB::InstSyncBOptToString(val); +} + void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned ShiftOp = MI->getOperand(OpNum).getImm(); @@ -931,7 +937,7 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, unsigned Mask = MI->getOperand(OpNum).getImm(); unsigned Firstcond = MI->getOperand(OpNum-1).getImm(); unsigned CondBit0 = Firstcond & 1; - unsigned NumTZ = CountTrailingZeros_32(Mask); + unsigned NumTZ = countTrailingZeros(Mask); assert(NumTZ <= 3 && "Invalid IT mask!"); for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { bool T = ((Mask >> Pos) & 1) == CondBit0; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 344104e873..5a6434886c 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -71,6 +71,7 @@ public: void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printInstSyncBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index 62473b2bfd..b6c85c2e94 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -140,7 +140,7 @@ namespace ARM_AM { if ((Imm & ~255U) == 0) return 0; // Use CTZ to compute the rotate amount. - unsigned TZ = CountTrailingZeros_32(Imm); + unsigned TZ = countTrailingZeros(Imm); // Rotate amount must be even. Something like 0x200 must be rotated 8 bits, // not 9. @@ -153,7 +153,7 @@ namespace ARM_AM { // For values like 0xF000000F, we should ignore the low 6 bits, then // retry the hunt. if (Imm & 63U) { - unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U); + unsigned TZ2 = countTrailingZeros(Imm & ~63U); unsigned RotAmt2 = TZ2 & ~1; if ((rotr32(Imm, RotAmt2) & ~255U) == 0) return (32-RotAmt2)&31; // HW rotates right, not left. @@ -221,7 +221,7 @@ namespace ARM_AM { if ((Imm & ~255U) == 0) return 0; // Use CTZ to compute the shift amount. - return CountTrailingZeros_32(Imm); + return countTrailingZeros(Imm); } /// isThumbImmShiftedVal - Return true if the specified value can be obtained @@ -240,7 +240,7 @@ namespace ARM_AM { if ((Imm & ~65535U) == 0) return 0; // Use CTZ to compute the shift amount. - return CountTrailingZeros_32(Imm); + return countTrailingZeros(Imm); } /// isThumbImm16ShiftedVal - Return true if the specified value can be @@ -296,7 +296,7 @@ namespace ARM_AM { /// encoding is possible. /// See ARM Reference Manual A6.3.2. static inline int getT2SOImmValRotateVal(unsigned V) { - unsigned RotAmt = CountLeadingZeros_32(V); + unsigned RotAmt = countLeadingZeros(V); if (RotAmt >= 24) return -1; @@ -328,7 +328,7 @@ namespace ARM_AM { static inline unsigned getT2SOImmValRotate(unsigned V) { if ((V & ~255U) == 0) return 0; // Use CTZ to compute the rotate amount. - unsigned RotAmt = CountTrailingZeros_32(V); + unsigned RotAmt = countTrailingZeros(V); return (32 - RotAmt) & 31; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index e66e985678..8baa3a6ce6 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -419,7 +419,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit; uint32_t imm10Bits = (offset & 0x1FF800) >> 11; uint32_t imm11Bits = (offset & 0x000007FF); - + uint32_t Binary = 0; uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | @@ -434,8 +434,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00) // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S). - // The value is encoded into disjoint bit positions in the destination - // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, + // The value is encoded into disjoint bit positions in the destination + // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, // J = either J1 or J2 bit, 0 = zero. // // BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0 @@ -450,10 +450,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit; uint32_t imm10HBits = (offset & 0xFFC00) >> 10; uint32_t imm10LBits = (offset & 0x3FF); - + uint32_t Binary = 0; uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); - uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | + uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | ((uint16_t)imm10LBits) << 1); Binary |= secondHalf << 16; Binary |= firstHalf; @@ -680,8 +680,11 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef return new DarwinARMAsmBackend(T, TT, CS); } - if (TheTriple.isOSWindows()) +#if 0 + // FIXME: Introduce yet another checker but assert(0). + if (TheTriple.isOSBinFormatCOFF()) assert(0 && "Windows not supported on ARM"); +#endif uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); return new ELFARMAsmBackend(T, TT, OSABI); diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index de48a0e0f3..ff9917d793 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -161,6 +161,49 @@ namespace ARM_MB { } } // namespace ARM_MB +namespace ARM_ISB { + enum InstSyncBOpt { + RESERVED_0 = 0, + RESERVED_1 = 1, + RESERVED_2 = 2, + RESERVED_3 = 3, + RESERVED_4 = 4, + RESERVED_5 = 5, + RESERVED_6 = 6, + RESERVED_7 = 7, + RESERVED_8 = 8, + RESERVED_9 = 9, + RESERVED_10 = 10, + RESERVED_11 = 11, + RESERVED_12 = 12, + RESERVED_13 = 13, + RESERVED_14 = 14, + SY = 15 + }; + + inline static const char *InstSyncBOptToString(unsigned val) { + switch (val) { + default: llvm_unreachable("Unkown memory operation"); + case RESERVED_0: return "#0x0"; + case RESERVED_1: return "#0x1"; + case RESERVED_2: return "#0x2"; + case RESERVED_3: return "#0x3"; + case RESERVED_4: return "#0x4"; + case RESERVED_5: return "#0x5"; + case RESERVED_6: return "#0x6"; + case RESERVED_7: return "#0x7"; + case RESERVED_8: return "#0x8"; + case RESERVED_9: return "#0x9"; + case RESERVED_10: return "#0xa"; + case RESERVED_11: return "#0xb"; + case RESERVED_12: return "#0xc"; + case RESERVED_13: return "#0xd"; + case RESERVED_14: return "#0xe"; + case SY: return "sy"; + } + } +} // namespace ARM_ISB + /// isARMLowRegister - Returns true if the register is a low register (r0-r7). /// static inline bool isARMLowRegister(unsigned Reg) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 6c3d247668..679d3c4a85 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -203,7 +203,8 @@ private: void Reset(); void EmitPersonalityFixup(StringRef Name); - void CollectUnwindOpcodes(); + void FlushPendingOffset(); + void FlushUnwindOpcodes(bool AllowCompactModel0); void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags, SectionKind Kind, const MCSymbol &Fn); @@ -220,13 +221,14 @@ private: MCSymbol *ExTab; MCSymbol *FnStart; const MCSymbol *Personality; - uint32_t VFPRegSave; // Register mask for {d31-d0} - uint32_t RegSave; // Register mask for {r15-r0} - int64_t SPOffset; - uint16_t FPReg; - int64_t FPOffset; + unsigned PersonalityIndex; + unsigned FPReg; // Frame pointer register + int64_t FPOffset; // Offset: (final frame pointer) - (initial $sp) + int64_t SPOffset; // Offset: (final $sp) - (initial $sp) + int64_t PendingOffset; // Offset: (final $sp) - (emitted $sp) bool UsedFP; bool CantUnwind; + SmallVector<uint8_t, 64> Opcodes; UnwindOpcodeAssembler UnwindOpAsm; }; } // end anonymous namespace @@ -279,19 +281,18 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) { } void ARMELFStreamer::Reset() { - const MCRegisterInfo &MRI = getContext().getRegisterInfo(); - ExTab = NULL; FnStart = NULL; Personality = NULL; - VFPRegSave = 0; - RegSave = 0; - FPReg = MRI.getEncodingValue(ARM::SP); + PersonalityIndex = NUM_PERSONALITY_INDEX; + FPReg = ARM::SP; FPOffset = 0; SPOffset = 0; + PendingOffset = 0; UsedFP = false; CantUnwind = false; + Opcodes.clear(); UnwindOpAsm.Reset(); } @@ -311,18 +312,6 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) { MCFixup::getKindForSize(4, false))); } -void ARMELFStreamer::CollectUnwindOpcodes() { - if (UsedFP) { - UnwindOpAsm.EmitSetFP(FPReg); - UnwindOpAsm.EmitSPOffset(-FPOffset); - } else { - UnwindOpAsm.EmitSPOffset(SPOffset); - } - UnwindOpAsm.EmitVFPRegSave(VFPRegSave); - UnwindOpAsm.EmitRegSave(RegSave); - UnwindOpAsm.Finalize(); -} - void ARMELFStreamer::EmitFnStart() { assert(FnStart == 0); FnStart = getContext().CreateTempSymbol(); @@ -333,27 +322,12 @@ void ARMELFStreamer::EmitFnEnd() { assert(FnStart && ".fnstart must preceeds .fnend"); // Emit unwind opcodes if there is no .handlerdata directive - if (!ExTab && !CantUnwind) { - CollectUnwindOpcodes(); - - unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex(); - if (PersonalityIndex == AEABI_UNWIND_CPP_PR1 || - PersonalityIndex == AEABI_UNWIND_CPP_PR2) { - // For the __aeabi_unwind_cpp_pr1 and __aeabi_unwind_cpp_pr2, we have to - // emit the unwind opcodes in the corresponding ".ARM.extab" section, and - // then emit a reference to these unwind opcodes in the second word of - // the exception index table entry. - SwitchToExTabSection(*FnStart); - ExTab = getContext().CreateTempSymbol(); - EmitLabel(ExTab); - EmitBytes(UnwindOpAsm.data(), 0); - } - } + if (!ExTab && !CantUnwind) + FlushUnwindOpcodes(true); // Emit the exception index table entry SwitchToExIdxSection(*FnStart); - unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex(); if (PersonalityIndex < NUM_PERSONALITY_INDEX) EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex)); @@ -379,11 +353,15 @@ void ARMELFStreamer::EmitFnEnd() { // opcodes should always be 4 bytes. assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 && "Compact model must use __aeabi_cpp_unwind_pr0 as personality"); - assert(UnwindOpAsm.size() == 4u && + assert(Opcodes.size() == 4u && "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4"); - EmitBytes(UnwindOpAsm.data(), 0); + EmitBytes(StringRef(reinterpret_cast<const char*>(Opcodes.data()), + Opcodes.size()), 0); } + // Switch to the section containing FnStart + SwitchSection(&FnStart->getSection()); + // Clean exception handling frame information Reset(); } @@ -392,7 +370,34 @@ void ARMELFStreamer::EmitCantUnwind() { CantUnwind = true; } -void ARMELFStreamer::EmitHandlerData() { +void ARMELFStreamer::FlushPendingOffset() { + if (PendingOffset != 0) { + UnwindOpAsm.EmitSPOffset(-PendingOffset); + PendingOffset = 0; + } +} + +void ARMELFStreamer::FlushUnwindOpcodes(bool AllowCompactModel0) { + // Emit the unwind opcode to restore $sp. + if (UsedFP) { + const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + int64_t LastRegSaveSPOffset = SPOffset - PendingOffset; + UnwindOpAsm.EmitSPOffset(LastRegSaveSPOffset - FPOffset); + UnwindOpAsm.EmitSetSP(MRI.getEncodingValue(FPReg)); + } else { + FlushPendingOffset(); + } + + // Finalize the unwind opcode sequence + UnwindOpAsm.Finalize(PersonalityIndex, Opcodes); + + // For compact model 0, we have to emit the unwind opcodes in the .ARM.exidx + // section. Thus, we don't have to create an entry in the .ARM.extab + // section. + if (AllowCompactModel0 && PersonalityIndex == AEABI_UNWIND_CPP_PR0) + return; + + // Switch to .ARM.extab section. SwitchToExTabSection(*FnStart); // Create .ARM.extab label for offset in .ARM.exidx @@ -400,19 +405,23 @@ void ARMELFStreamer::EmitHandlerData() { ExTab = getContext().CreateTempSymbol(); EmitLabel(ExTab); - // Emit Personality - assert(Personality && ".personality directive must preceed .handlerdata"); - - const MCSymbolRefExpr *PersonalityRef = - MCSymbolRefExpr::Create(Personality, - MCSymbolRefExpr::VK_ARM_PREL31, - getContext()); + // Emit personality + if (Personality) { + const MCSymbolRefExpr *PersonalityRef = + MCSymbolRefExpr::Create(Personality, + MCSymbolRefExpr::VK_ARM_PREL31, + getContext()); - EmitValue(PersonalityRef, 4, 0); + EmitValue(PersonalityRef, 4, 0); + } // Emit unwind opcodes - CollectUnwindOpcodes(); - EmitBytes(UnwindOpAsm.data(), 0); + EmitBytes(StringRef(reinterpret_cast<const char *>(Opcodes.data()), + Opcodes.size()), 0); +} + +void ARMELFStreamer::EmitHandlerData() { + FlushUnwindOpcodes(false); } void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) { @@ -423,42 +432,55 @@ void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) { void ARMELFStreamer::EmitSetFP(unsigned NewFPReg, unsigned NewSPReg, int64_t Offset) { - assert(SPOffset == 0 && - "Current implementation assumes .setfp precedes .pad"); - - const MCRegisterInfo &MRI = getContext().getRegisterInfo(); - - uint16_t NewFPRegEncVal = MRI.getEncodingValue(NewFPReg); -#ifndef NDEBUG - uint16_t NewSPRegEncVal = MRI.getEncodingValue(NewSPReg); -#endif - - assert((NewSPReg == ARM::SP || NewSPRegEncVal == FPReg) && + assert((NewSPReg == ARM::SP || NewSPReg == FPReg) && "the operand of .setfp directive should be either $sp or $fp"); UsedFP = true; - FPReg = NewFPRegEncVal; - FPOffset = Offset; + FPReg = NewFPReg; + + if (NewSPReg == ARM::SP) + FPOffset = SPOffset + Offset; + else + FPOffset += Offset; } void ARMELFStreamer::EmitPad(int64_t Offset) { - SPOffset += Offset; + // Track the change of the $sp offset + SPOffset -= Offset; + + // To squash multiple .pad directives, we should delay the unwind opcode + // until the .save, .vsave, .handlerdata, or .fnend directives. + PendingOffset -= Offset; } void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool IsVector) { + // Collect the registers in the register list + unsigned Count = 0; + uint32_t Mask = 0; const MCRegisterInfo &MRI = getContext().getRegisterInfo(); - -#ifndef NDEBUG - unsigned Max = IsVector ? 32 : 16; -#endif - uint32_t &RegMask = IsVector ? VFPRegSave : RegSave; - for (size_t i = 0; i < RegList.size(); ++i) { unsigned Reg = MRI.getEncodingValue(RegList[i]); - assert(Reg < Max && "Register encoded value out of range"); - RegMask |= 1u << Reg; + assert(Reg < (IsVector ? 32U : 16U) && "Register out of range"); + unsigned Bit = (1u << Reg); + if ((Mask & Bit) == 0) { + Mask |= Bit; + ++Count; + } } + + // Track the change the $sp offset: For the .save directive, the + // corresponding push instruction will decrease the $sp by (4 * Count). + // For the .vsave directive, the corresponding vpush instruction will + // decrease $sp by (8 * Count). + SPOffset -= Count * (IsVector ? 8 : 4); + + // Emit the opcode + FlushPendingOffset(); + if (IsVector) + UnwindOpAsm.EmitVFPRegSave(Mask); + else + UnwindOpAsm.EmitRegSave(Mask); } namespace llvm { diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 7a59a7dd50..2aa1010217 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -1359,8 +1359,8 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, // msb of the mask. const MCOperand &MO = MI.getOperand(Op); uint32_t v = ~MO.getImm(); - uint32_t lsb = CountTrailingZeros_32(v); - uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1; + uint32_t lsb = countTrailingZeros(v); + uint32_t msb = (32 - countLeadingZeros (v)) - 1; assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!"); return lsb | (msb << 5); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index f09fb5a94f..14fd03fad8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -61,6 +61,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { unsigned SubVer = TT[Idx]; if (SubVer >= '7' && SubVer <= '9') { if (Len >= Idx+2 && TT[Idx+1] == 'm') { + isThumb = true; if (NoCPU) // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass"; @@ -99,6 +100,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') ARMArchFeature = "+v6t2"; else if (Len >= Idx+2 && TT[Idx+1] == 'm') { + isThumb = true; if (NoCPU) // v6m: FeatureNoARM, FeatureMClass ARMArchFeature = "+v6,+noarm,+mclass"; @@ -159,7 +161,7 @@ static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { return X; } -static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) { +static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) @@ -212,6 +214,15 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T, return 0; } +static MCRelocationInfo *createARMMCRelocationInfo(StringRef TT, + MCContext &Ctx) { + Triple TheTriple(TT); + if (TheTriple.isEnvironmentMachO()) + return createARMMachORelocationInfo(Ctx); + // Default to the stock relocation info. + return llvm::createMCRelocationInfo(TT, Ctx); +} + namespace { class ARMMCInstrAnalysis : public MCInstrAnalysis { @@ -232,15 +243,16 @@ public: return MCInstrAnalysis::isConditionalBranch(Inst); } - uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size) const { + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size, uint64_t &Target) const { // We only handle PCRel branches for now. if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL) - return -1ULL; + return false; int64_t Imm = Inst.getOperand(0).getImm(); // FIXME: This is not right for thumb. - return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes. + Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes. + return true; } }; @@ -295,4 +307,10 @@ extern "C" void LLVMInitializeARMTargetMC() { // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter); TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter); + + // Register the MC relocation info. + TargetRegistry::RegisterMCRelocationInfo(TheARMTarget, + createARMMCRelocationInfo); + TargetRegistry::RegisterMCRelocationInfo(TheThumbTarget, + createARMMCRelocationInfo); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index a89981e4f0..4e94c5341b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -25,6 +25,7 @@ class MCInstrInfo; class MCObjectWriter; class MCRegisterInfo; class MCSubtargetInfo; +class MCRelocationInfo; class StringRef; class Target; class raw_ostream; @@ -58,6 +59,9 @@ MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype); + +/// createARMMachORelocationInfo - Construct ARM Mach-O relocation info. +MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx); } // End llvm namespace // Defines symbolic names for ARM registers. This defines a mapping from diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp new file mode 100644 index 0000000000..807c9483bc --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -0,0 +1,43 @@ +//===-- ARMMachORelocationInfo.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "ARMMCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRelocationInfo.h" +#include "llvm-c/Disassembler.h" + +using namespace llvm; +using namespace object; + +namespace { +class ARMMachORelocationInfo : public MCRelocationInfo { +public: + ARMMachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {} + + const MCExpr *createExprForCAPIVariantKind(const MCExpr *SubExpr, + unsigned VariantKind) { + switch(VariantKind) { + case LLVMDisassembler_VariantKind_ARM_HI16: + return ARMMCExpr::CreateUpper16(SubExpr, Ctx); + case LLVMDisassembler_VariantKind_ARM_LO16: + return ARMMCExpr::CreateLower16(SubExpr, Ctx); + default: + return MCRelocationInfo::createExprForCAPIVariantKind(SubExpr, + VariantKind); + } + } +}; +} // End unnamed namespace + +/// createARMMachORelocationInfo - Construct an ARM Mach-O RelocationInfo. +MCRelocationInfo *llvm::createARMMachORelocationInfo(MCContext &Ctx) { + return new ARMMachORelocationInfo(Ctx); +} diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp index 191db69fbc..c943370818 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp @@ -20,6 +20,48 @@ using namespace llvm; +namespace { + /// UnwindOpcodeStreamer - The simple wrapper over SmallVector to emit bytes + /// with MSB to LSB per uint32_t ordering. For example, the first byte will + /// be placed in Vec[3], and the following bytes will be placed in 2, 1, 0, + /// 7, 6, 5, 4, 11, 10, 9, 8, and so on. + class UnwindOpcodeStreamer { + private: + SmallVectorImpl<uint8_t> &Vec; + size_t Pos; + + public: + UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V), Pos(3) { + } + + /// Emit the byte in MSB to LSB per uint32_t order. + inline void EmitByte(uint8_t elem) { + Vec[Pos] = elem; + Pos = (((Pos ^ 0x3u) + 1) ^ 0x3u); + } + + /// Emit the size prefix. + inline void EmitSize(size_t Size) { + size_t SizeInWords = (Size + 3) / 4; + assert(SizeInWords <= 0x100u && + "Only 256 additional words are allowed for unwind opcodes"); + EmitByte(static_cast<uint8_t>(SizeInWords - 1)); + } + + /// Emit the personality index prefix. + inline void EmitPersonalityIndex(unsigned PI) { + assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix"); + EmitByte(EHT_COMPACT | PI); + } + + /// Fill the rest of bytes with FINISH opcode. + inline void FillFinishOpcode() { + while (Pos < Vec.size()) + EmitByte(UNWIND_OPCODE_FINISH); + } + }; +} + void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { if (RegSave == 0u) return; @@ -43,28 +85,22 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask); if (UnmaskedReg == 0u) { // Pop r[4 : (4 + n)] - Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range); + EmitInt8(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range); RegSave &= 0x000fu; } else if (UnmaskedReg == (1u << 14)) { // Pop r[14] + r[4 : (4 + n)] - Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range); + EmitInt8(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range); RegSave &= 0x000fu; } } // Two bytes opcode to save register r15-r4 - if ((RegSave & 0xfff0u) != 0) { - uint32_t Op = UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4); - Ops.push_back(static_cast<uint8_t>(Op >> 8)); - Ops.push_back(static_cast<uint8_t>(Op & 0xff)); - } + if ((RegSave & 0xfff0u) != 0) + EmitInt16(UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4)); // Opcode to save register r3-r0 - if ((RegSave & 0x000fu) != 0) { - uint32_t Op = UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu); - Ops.push_back(static_cast<uint8_t>(Op >> 8)); - Ops.push_back(static_cast<uint8_t>(Op & 0xff)); - } + if ((RegSave & 0x000fu) != 0) + EmitInt16(UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu)); } /// Emit unwind opcodes for .vsave directives @@ -89,10 +125,8 @@ void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) { Bit >>= 1; } - uint32_t Op = - UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range; - Ops.push_back(static_cast<uint8_t>(Op >> 8)); - Ops.push_back(static_cast<uint8_t>(Op & 0xff)); + EmitInt16(UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | + ((i - 16) << 4) | Range); } while (i > 0) { @@ -113,86 +147,75 @@ void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) { Bit >>= 1; } - uint32_t Op = UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range; - Ops.push_back(static_cast<uint8_t>(Op >> 8)); - Ops.push_back(static_cast<uint8_t>(Op & 0xff)); + EmitInt16(UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range); } } -/// Emit unwind opcodes for .setfp directives -void UnwindOpcodeAssembler::EmitSetFP(uint16_t FPReg) { - Ops.push_back(UNWIND_OPCODE_SET_VSP | FPReg); +/// Emit unwind opcodes to copy address from source register to $sp. +void UnwindOpcodeAssembler::EmitSetSP(uint16_t Reg) { + EmitInt8(UNWIND_OPCODE_SET_VSP | Reg); } -/// Emit unwind opcodes to update stack pointer +/// Emit unwind opcodes to add $sp with an offset. void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) { if (Offset > 0x200) { - uint8_t Buff[10]; - size_t Size = encodeULEB128((Offset - 0x204) >> 2, Buff); - Ops.push_back(UNWIND_OPCODE_INC_VSP_ULEB128); - Ops.append(Buff, Buff + Size); + uint8_t Buff[16]; + Buff[0] = UNWIND_OPCODE_INC_VSP_ULEB128; + size_t ULEBSize = encodeULEB128((Offset - 0x204) >> 2, Buff + 1); + EmitBytes(Buff, ULEBSize + 1); } else if (Offset > 0) { if (Offset > 0x100) { - Ops.push_back(UNWIND_OPCODE_INC_VSP | 0x3fu); + EmitInt8(UNWIND_OPCODE_INC_VSP | 0x3fu); Offset -= 0x100; } - Ops.push_back(UNWIND_OPCODE_INC_VSP | - static_cast<uint8_t>((Offset - 4) >> 2)); + EmitInt8(UNWIND_OPCODE_INC_VSP | static_cast<uint8_t>((Offset - 4) >> 2)); } else if (Offset < 0) { while (Offset < -0x100) { - Ops.push_back(UNWIND_OPCODE_DEC_VSP | 0x3fu); + EmitInt8(UNWIND_OPCODE_DEC_VSP | 0x3fu); Offset += 0x100; } - Ops.push_back(UNWIND_OPCODE_DEC_VSP | - static_cast<uint8_t>(((-Offset) - 4) >> 2)); + EmitInt8(UNWIND_OPCODE_DEC_VSP | + static_cast<uint8_t>(((-Offset) - 4) >> 2)); } } -void UnwindOpcodeAssembler::AddOpcodeSizePrefix(size_t Pos) { - size_t SizeInWords = (size() + 3) / 4; - assert(SizeInWords <= 0x100u && - "Only 256 additional words are allowed for unwind opcodes"); - Ops[Pos] = static_cast<uint8_t>(SizeInWords - 1); -} +void UnwindOpcodeAssembler::Finalize(unsigned &PersonalityIndex, + SmallVectorImpl<uint8_t> &Result) { -void UnwindOpcodeAssembler::AddPersonalityIndexPrefix(size_t Pos, unsigned PI) { - assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix"); - Ops[Pos] = EHT_COMPACT | PI; -} + UnwindOpcodeStreamer OpStreamer(Result); -void UnwindOpcodeAssembler::EmitFinishOpcodes() { - for (size_t i = (0x4u - (size() & 0x3u)) & 0x3u; i > 0; --i) - Ops.push_back(UNWIND_OPCODE_FINISH); -} - -void UnwindOpcodeAssembler::Finalize() { if (HasPersonality) { - // Personality specified by .personality directive - Offset = 1; - AddOpcodeSizePrefix(1); + // User-specifed personality routine: [ SIZE , OP1 , OP2 , ... ] + PersonalityIndex = NUM_PERSONALITY_INDEX; + size_t TotalSize = Ops.size() + 1; + size_t RoundUpSize = (TotalSize + 3) / 4 * 4; + Result.resize(RoundUpSize); + OpStreamer.EmitSize(RoundUpSize); } else { - if (getOpcodeSize() <= 3) { + if (Ops.size() <= 3) { // __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ] - Offset = 1; PersonalityIndex = AEABI_UNWIND_CPP_PR0; - AddPersonalityIndexPrefix(Offset, PersonalityIndex); + Result.resize(4); + OpStreamer.EmitPersonalityIndex(PersonalityIndex); } else { // __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ] - Offset = 0; PersonalityIndex = AEABI_UNWIND_CPP_PR1; - AddPersonalityIndexPrefix(Offset, PersonalityIndex); - AddOpcodeSizePrefix(1); + size_t TotalSize = Ops.size() + 2; + size_t RoundUpSize = (TotalSize + 3) / 4 * 4; + Result.resize(RoundUpSize); + OpStreamer.EmitPersonalityIndex(PersonalityIndex); + OpStreamer.EmitSize(RoundUpSize); } } - // Emit the padding finish opcodes if the size() is not multiple of 4. - EmitFinishOpcodes(); + // Copy the unwind opcodes + for (size_t i = OpBegins.size() - 1; i > 0; --i) + for (size_t j = OpBegins[i - 1], end = OpBegins[i]; j < end; ++j) + OpStreamer.EmitByte(Ops[j]); - // Swap the byte order - uint8_t *Ptr = Ops.begin() + Offset; - assert(size() % 4 == 0 && "Final unwind opcodes should align to 4"); - for (size_t i = 0, n = size(); i < n; i += 4) { - std::swap(Ptr[i], Ptr[i + 3]); - std::swap(Ptr[i + 1], Ptr[i + 2]); - } + // Emit the padding finish opcodes if the size is not multiple of 4. + OpStreamer.FillFinishOpcode(); + + // Reset the assembler state + Reset(); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h index f6ecaeb8b2..ac67c6efab 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h +++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h @@ -27,86 +27,61 @@ class MCSymbol; class UnwindOpcodeAssembler { private: - llvm::SmallVector<uint8_t, 8> Ops; - - unsigned Offset; - unsigned PersonalityIndex; + llvm::SmallVector<uint8_t, 32> Ops; + llvm::SmallVector<unsigned, 8> OpBegins; bool HasPersonality; - enum { - // The number of bytes to be preserved for the size and personality index - // prefix of unwind opcodes. - NUM_PRESERVED_PREFIX_BUF = 2 - }; - public: UnwindOpcodeAssembler() - : Ops(NUM_PRESERVED_PREFIX_BUF), Offset(NUM_PRESERVED_PREFIX_BUF), - PersonalityIndex(NUM_PERSONALITY_INDEX), HasPersonality(0) { + : HasPersonality(0) { + OpBegins.push_back(0); } /// Reset the unwind opcode assembler. void Reset() { - Ops.resize(NUM_PRESERVED_PREFIX_BUF); - Offset = NUM_PRESERVED_PREFIX_BUF; - PersonalityIndex = NUM_PERSONALITY_INDEX; + Ops.clear(); + OpBegins.clear(); + OpBegins.push_back(0); HasPersonality = 0; } - /// Get the size of the payload (including the size byte) - size_t size() const { - return Ops.size() - Offset; - } - - /// Get the beginning of the payload - const uint8_t *begin() const { - return Ops.begin() + Offset; - } - - /// Get the payload - StringRef data() const { - return StringRef(reinterpret_cast<const char *>(begin()), size()); - } - /// Set the personality index void setPersonality(const MCSymbol *Per) { HasPersonality = 1; } - /// Get the personality index - unsigned getPersonalityIndex() const { - return PersonalityIndex; - } - /// Emit unwind opcodes for .save directives void EmitRegSave(uint32_t RegSave); /// Emit unwind opcodes for .vsave directives void EmitVFPRegSave(uint32_t VFPRegSave); - /// Emit unwind opcodes for .setfp directives - void EmitSetFP(uint16_t FPReg); + /// Emit unwind opcodes to copy address from source register to $sp. + void EmitSetSP(uint16_t Reg); - /// Emit unwind opcodes to update stack pointer + /// Emit unwind opcodes to add $sp with an offset. void EmitSPOffset(int64_t Offset); /// Finalize the unwind opcode sequence for EmitBytes() - void Finalize(); + void Finalize(unsigned &PersonalityIndex, + SmallVectorImpl<uint8_t> &Result); private: - /// Get the size of the opcodes in bytes. - size_t getOpcodeSize() const { - return Ops.size() - NUM_PRESERVED_PREFIX_BUF; + void EmitInt8(unsigned Opcode) { + Ops.push_back(Opcode & 0xff); + OpBegins.push_back(OpBegins.back() + 1); } - /// Add the length prefix to the payload - void AddOpcodeSizePrefix(size_t Pos); - - /// Add personality index prefix in some compact format - void AddPersonalityIndexPrefix(size_t Pos, unsigned PersonalityIndex); + void EmitInt16(unsigned Opcode) { + Ops.push_back((Opcode >> 8) & 0xff); + Ops.push_back(Opcode & 0xff); + OpBegins.push_back(OpBegins.back() + 2); + } - /// Fill the words with finish opcode if it is not aligned - void EmitFinishOpcodes(); + void EmitBytes(const uint8_t *Opcode, size_t Size) { + Ops.insert(Ops.end(), Opcode, Opcode + Size); + OpBegins.push_back(OpBegins.back() + Size); + } }; } // namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index a7ac5ca061..bab59f41c9 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMARMDesc ARMMachObjectWriter.cpp ARMELFObjectWriter.cpp ARMUnwindOpAsm.cpp + ARMMachORelocationInfo.cpp ) add_dependencies(LLVMARMDesc ARMCommonTableGen) diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 1e2a8b03e1..db49db8ca3 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -88,7 +88,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -249,7 +250,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 095736d52a..22a925e0ff 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -22,7 +22,7 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(*this, STI) { + : ARMBaseInstrInfo(STI), RI(STI) { } /// getNoopForMachoTarget - Return the noop instruction to use for a noop. diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 7452fb776e..6722614027 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -40,9 +40,8 @@ extern cl::opt<bool> ReuseFrameIndexVals; using namespace llvm; -Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &sti) - : ARMBaseRegisterInfo(tii, sti) { +Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMSubtarget &sti) + : ARMBaseRegisterInfo(sti) { } const TargetRegisterClass* @@ -70,6 +69,7 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); @@ -488,6 +488,9 @@ void Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, unsigned BaseReg, int64_t Offset) const { MachineInstr &MI = *I; + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>( + MI.getParent()->getParent()->getTarget().getInstrInfo()); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -513,6 +516,7 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // off the frame pointer (if, for example, there are alloca() calls in // the function, the offset will be negative. Use R12 instead since that's // a call clobbered register that we know won't be used in Thumb1 mode. + const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo(); DebugLoc DL; AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr)) .addReg(ARM::R12, RegState::Define) @@ -558,6 +562,8 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); MachineInstrBuilder MIB(*MBB.getParent(), &MI); diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index ebbab36dd7..9689b23146 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -25,7 +25,7 @@ namespace llvm { struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { public: - Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); + Thumb1RegisterInfo(const ARMSubtarget &STI); const TargetRegisterClass* getLargestLegalSuperClass(const TargetRegisterClass *RC) const; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 97c254ce75..d8596d7993 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -73,15 +73,15 @@ static void TrackDefUses(MachineInstr *MI, for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { unsigned Reg = LocalUses[i]; - Uses.insert(Reg); - for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg) + for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true); + Subreg.isValid(); ++Subreg) Uses.insert(*Subreg); } for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Reg = LocalDefs[i]; - Defs.insert(Reg); - for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg) + for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true); + Subreg.isValid(); ++Subreg) Defs.insert(*Subreg); if (Reg == ARM::CPSR) continue; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index a1b48c226a..286eaa0946 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -31,7 +31,7 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(*this, STI) { + : ARMBaseInstrInfo(STI), RI(STI) { } /// getNoopForMachoTarget - Return the noop instruction to use for a noop. @@ -285,7 +285,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, NumBytes = 0; } else { // FIXME: Move this to ARMAddressingModes.h? - unsigned RotAmt = CountLeadingZeros_32(ThisVal); + unsigned RotAmt = countLeadingZeros(ThisVal); ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); NumBytes &= ~ThisVal; assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && @@ -302,7 +302,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, NumBytes = 0; } else { // FIXME: Move this to ARMAddressingModes.h? - unsigned RotAmt = CountLeadingZeros_32(ThisVal); + unsigned RotAmt = countLeadingZeros(ThisVal); ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); NumBytes &= ~ThisVal; assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && @@ -484,7 +484,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Otherwise, extract 8 adjacent bits from the immediate into this // t2ADDri/t2SUBri. - unsigned RotAmt = CountLeadingZeros_32(Offset); + unsigned RotAmt = countLeadingZeros<unsigned>(Offset); unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt); // We will handle these bits from offset, clear them. diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 1a7a4d450c..4cb827f308 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -24,9 +24,8 @@ #include "llvm/IR/Function.h" using namespace llvm; -Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &sti) - : ARMBaseRegisterInfo(tii, sti) { +Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMSubtarget &sti) + : ARMBaseRegisterInfo(sti) { } /// emitLoadConstPool - Emits a load from constpool to materialize the @@ -40,6 +39,7 @@ Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index 6b397e8696..b1d63fa86d 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -20,12 +20,12 @@ #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { - class ARMSubtarget; - class ARMBaseInstrInfo; + +class ARMSubtarget; struct Thumb2RegisterInfo : public ARMBaseRegisterInfo { public: - Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); + Thumb2RegisterInfo(const ARMSubtarget &STI); /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. |