aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM64/ARM64FrameLowering.cpp
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
committerStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
commitdce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch)
treedcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Target/ARM64/ARM64FrameLowering.cpp
parent220b921aed042f9e520c26cffd8282a94c66c3d5 (diff)
downloadexternal_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Target/ARM64/ARM64FrameLowering.cpp')
-rw-r--r--lib/Target/ARM64/ARM64FrameLowering.cpp816
1 files changed, 0 insertions, 816 deletions
diff --git a/lib/Target/ARM64/ARM64FrameLowering.cpp b/lib/Target/ARM64/ARM64FrameLowering.cpp
deleted file mode 100644
index 798986ca90..0000000000
--- a/lib/Target/ARM64/ARM64FrameLowering.cpp
+++ /dev/null
@@ -1,816 +0,0 @@
-//===- ARM64FrameLowering.cpp - ARM64 Frame Lowering -----------*- C++ -*-====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM64 implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "frame-info"
-#include "ARM64FrameLowering.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64Subtarget.h"
-#include "ARM64TargetMachine.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-static cl::opt<bool> EnableRedZone("arm64-redzone",
- cl::desc("enable use of redzone on ARM64"),
- cl::init(false), cl::Hidden);
-
-STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
-
-static unsigned estimateStackSize(MachineFunction &MF) {
- const MachineFrameInfo *FFI = MF.getFrameInfo();
- int Offset = 0;
- for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -FFI->getObjectOffset(i);
- if (FixedOff > Offset)
- Offset = FixedOff;
- }
- for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
- if (FFI->isDeadObjectIndex(i))
- continue;
- Offset += FFI->getObjectSize(i);
- unsigned Align = FFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset + Align - 1) / Align * Align;
- }
- // This does not include the 16 bytes used for fp and lr.
- return (unsigned)Offset;
-}
-
-bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
- if (!EnableRedZone)
- return false;
- // Don't use the red zone if the function explicitly asks us not to.
- // This is typically used for kernel code.
- if (MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::NoRedZone))
- return false;
-
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
- unsigned NumBytes = AFI->getLocalStackSize();
-
- // Note: currently hasFP() is always true for hasCalls(), but that's an
- // implementation detail of the current code, not a strict requirement,
- // so stay safe here and check both.
- if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
- return false;
- return true;
-}
-
-/// hasFP - Return true if the specified function should have a dedicated frame
-/// pointer register.
-bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-#ifndef NDEBUG
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- assert(!RegInfo->needsStackRealignment(MF) &&
- "No stack realignment on ARM64!");
-#endif
-
- return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken());
-}
-
-/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
-/// not required, we reserve argument space for call sites in the function
-/// immediately on entry to the current function. This eliminates the need for
-/// add/sub sp brackets around call sites. Returns true if the call frame is
-/// included as part of the stack frame.
-bool ARM64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
- return !MF.getFrameInfo()->hasVarSizedObjects();
-}
-
-void ARM64FrameLowering::eliminateCallFramePseudoInstr(
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const ARM64InstrInfo *TII =
- static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
- if (!TFI->hasReservedCallFrame(MF)) {
- // If we have alloca, convert as follows:
- // ADJCALLSTACKDOWN -> sub, sp, sp, amount
- // ADJCALLSTACKUP -> add, sp, sp, amount
- MachineInstr *Old = I;
- DebugLoc DL = Old->getDebugLoc();
- unsigned Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned Align = TFI->getStackAlignment();
- Amount = (Amount + Align - 1) / Align * Align;
-
- // Replace the pseudo instruction with a new instruction...
- unsigned Opc = Old->getOpcode();
- if (Opc == ARM64::ADJCALLSTACKDOWN) {
- emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -Amount, TII);
- } else {
- assert(Opc == ARM64::ADJCALLSTACKUP && "expected ADJCALLSTACKUP");
- emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII);
- }
- }
- }
- MBB.erase(I);
-}
-
-void
-ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned FramePtr) const {
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineModuleInfo &MMI = MF.getMMI();
- const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const ARM64InstrInfo *TII = TM.getInstrInfo();
- DebugLoc DL = MBB.findDebugLoc(MBBI);
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- if (CSI.empty())
- return;
-
- const DataLayout *TD = MF.getTarget().getDataLayout();
- bool HasFP = hasFP(MF);
-
- // Calculate amount of bytes used for return address storing.
- int stackGrowth = -TD->getPointerSize(0);
-
- // Calculate offsets.
- int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
- unsigned TotalSkipped = 0;
- for (const auto &Info : CSI) {
- unsigned Reg = Info.getReg();
- int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) -
- getOffsetOfLocalArea() + saveAreaOffset;
-
- // Don't output a new CFI directive if we're re-saving the frame pointer or
- // link register. This happens when the PrologEpilogInserter has inserted an
- // extra "STP" of the frame pointer and link register -- the "emitPrologue"
- // method automatically generates the directives when frame pointers are
- // used. If we generate CFI directives for the extra "STP"s, the linker will
- // lose track of the correct values for the frame pointer and link register.
- if (HasFP && (FramePtr == Reg || Reg == ARM64::LR)) {
- TotalSkipped += stackGrowth;
- continue;
- }
-
- unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, DwarfReg, Offset - TotalSkipped));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
-}
-
-void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
- MachineBasicBlock::iterator MBBI = MBB.begin();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const Function *Fn = MF.getFunction();
- const ARM64RegisterInfo *RegInfo = TM.getRegisterInfo();
- const ARM64InstrInfo *TII = TM.getInstrInfo();
- MachineModuleInfo &MMI = MF.getMMI();
- ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
- bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
- bool HasFP = hasFP(MF);
- DebugLoc DL = MBB.findDebugLoc(MBBI);
-
- int NumBytes = (int)MFI->getStackSize();
- if (!AFI->hasStackFrame()) {
- assert(!HasFP && "unexpected function without stack frame but with FP");
-
- // All of the stack allocation is for locals.
- AFI->setLocalStackSize(NumBytes);
-
- // Label used to tie together the PROLOG_LABEL and the MachineMoves.
- MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
-
- // REDZONE: If the stack size is less than 128 bytes, we don't need
- // to actually allocate.
- if (NumBytes && !canUseRedZone(MF)) {
- emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
-
- // Encode the stack size of the leaf function.
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- } else if (NumBytes) {
- ++NumRedZoneFunctions;
- }
-
- return;
- }
-
- // Only set up FP if we actually need to.
- int FPOffset = 0;
- if (HasFP) {
- // First instruction must a) allocate the stack and b) have an immediate
- // that is a multiple of -2.
- assert((MBBI->getOpcode() == ARM64::STPXpre ||
- MBBI->getOpcode() == ARM64::STPDpre) &&
- MBBI->getOperand(2).getReg() == ARM64::SP &&
- MBBI->getOperand(3).getImm() < 0 &&
- (MBBI->getOperand(3).getImm() & 1) == 0);
-
- // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space
- // required for the callee saved register area we get the frame pointer
- // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
- FPOffset = -(MBBI->getOperand(3).getImm() + 2) * 8;
- assert(FPOffset >= 0 && "Bad Framepointer Offset");
- }
-
- // Move past the saves of the callee-saved registers.
- while (MBBI->getOpcode() == ARM64::STPXi ||
- MBBI->getOpcode() == ARM64::STPDi ||
- MBBI->getOpcode() == ARM64::STPXpre ||
- MBBI->getOpcode() == ARM64::STPDpre) {
- ++MBBI;
- NumBytes -= 16;
- }
- assert(NumBytes >= 0 && "Negative stack allocation size!?");
- if (HasFP) {
- // Issue sub fp, sp, FPOffset or
- // mov fp,sp when FPOffset is zero.
- // Note: All stores of callee-saved registers are marked as "FrameSetup".
- // This code marks the instruction(s) that set the FP also.
- emitFrameOffset(MBB, MBBI, DL, ARM64::FP, ARM64::SP, FPOffset, TII,
- MachineInstr::FrameSetup);
- }
-
- // All of the remaining stack allocations are for locals.
- AFI->setLocalStackSize(NumBytes);
-
- // Allocate space for the rest of the frame.
- if (NumBytes) {
- // If we're a leaf function, try using the red zone.
- if (!canUseRedZone(MF))
- emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
- }
-
- // If we need a base pointer, set it up here. It's whatever the value of the
- // stack pointer is at this point. Any variable size objects will be allocated
- // after this, so we can still use the base pointer to reference locals.
- //
- // FIXME: Clarify FrameSetup flags here.
- // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
- // needed.
- //
- if (RegInfo->hasBasePointer(MF))
- TII->copyPhysReg(MBB, MBBI, DL, ARM64::X19, ARM64::SP, false);
-
- if (needsFrameMoves) {
- const DataLayout *TD = MF.getTarget().getDataLayout();
- const int StackGrowth = -TD->getPointerSize(0);
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
-
- // An example of the prologue:
- //
- // .globl __foo
- // .align 2
- // __foo:
- // Ltmp0:
- // .cfi_startproc
- // .cfi_personality 155, ___gxx_personality_v0
- // Leh_func_begin:
- // .cfi_lsda 16, Lexception33
- //
- // stp xa,bx, [sp, -#offset]!
- // ...
- // stp x28, x27, [sp, #offset-32]
- // stp fp, lr, [sp, #offset-16]
- // add fp, sp, #offset - 16
- // sub sp, sp, #1360
- //
- // The Stack:
- // +-------------------------------------------+
- // 10000 | ........ | ........ | ........ | ........ |
- // 10004 | ........ | ........ | ........ | ........ |
- // +-------------------------------------------+
- // 10008 | ........ | ........ | ........ | ........ |
- // 1000c | ........ | ........ | ........ | ........ |
- // +===========================================+
- // 10010 | X28 Register |
- // 10014 | X28 Register |
- // +-------------------------------------------+
- // 10018 | X27 Register |
- // 1001c | X27 Register |
- // +===========================================+
- // 10020 | Frame Pointer |
- // 10024 | Frame Pointer |
- // +-------------------------------------------+
- // 10028 | Link Register |
- // 1002c | Link Register |
- // +===========================================+
- // 10030 | ........ | ........ | ........ | ........ |
- // 10034 | ........ | ........ | ........ | ........ |
- // +-------------------------------------------+
- // 10038 | ........ | ........ | ........ | ........ |
- // 1003c | ........ | ........ | ........ | ........ |
- // +-------------------------------------------+
- //
- // [sp] = 10030 :: >>initial value<<
- // sp = 10020 :: stp fp, lr, [sp, #-16]!
- // fp = sp == 10020 :: mov fp, sp
- // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
- // sp == 10010 :: >>final value<<
- //
- // The frame pointer (w29) points to address 10020. If we use an offset of
- // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
- // for w27, and -32 for w28:
- //
- // Ltmp1:
- // .cfi_def_cfa w29, 16
- // Ltmp2:
- // .cfi_offset w30, -8
- // Ltmp3:
- // .cfi_offset w29, -16
- // Ltmp4:
- // .cfi_offset w27, -24
- // Ltmp5:
- // .cfi_offset w28, -32
-
- if (HasFP) {
- // Define the current CFA rule to use the provided FP.
- unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // Record the location of the stored LR
- unsigned LR = RegInfo->getDwarfRegNum(ARM64::LR, true);
- CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // Record the location of the stored FP
- CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- } else {
- // Encode the stack size of the leaf function.
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
-
- // Now emit the moves for whatever callee saved regs we have.
- emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
- }
-}
-
-static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
- for (unsigned i = 0; CSRegs[i]; ++i)
- if (Reg == CSRegs[i])
- return true;
- return false;
-}
-
-static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
- if (MI->getOpcode() == ARM64::LDPXpost ||
- MI->getOpcode() == ARM64::LDPDpost || MI->getOpcode() == ARM64::LDPXi ||
- MI->getOpcode() == ARM64::LDPDi) {
- if (!isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) ||
- !isCalleeSavedRegister(MI->getOperand(1).getReg(), CSRegs) ||
- MI->getOperand(2).getReg() != ARM64::SP)
- return false;
- return true;
- }
-
- return false;
-}
-
-void ARM64FrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARM64InstrInfo *TII =
- static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
- const ARM64RegisterInfo *RegInfo =
- static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
- DebugLoc DL = MBBI->getDebugLoc();
-
- int NumBytes = MFI->getStackSize();
- unsigned NumRestores = 0;
- // Move past the restores of the callee-saved registers.
- MachineBasicBlock::iterator LastPopI = MBBI;
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
- if (LastPopI != MBB.begin()) {
- do {
- ++NumRestores;
- --LastPopI;
- } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
- if (!isCSRestore(LastPopI, CSRegs)) {
- ++LastPopI;
- --NumRestores;
- }
- }
- NumBytes -= NumRestores * 16;
- assert(NumBytes >= 0 && "Negative stack allocation size!?");
-
- if (!hasFP(MF)) {
- // If this was a redzone leaf function, we don't need to restore the
- // stack pointer.
- if (!canUseRedZone(MF))
- emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII);
- return;
- }
-
- // Restore the original stack pointer.
- // FIXME: Rather than doing the math here, we should instead just use
- // non-post-indexed loads for the restores if we aren't actually going to
- // be able to save any instructions.
- if (NumBytes || MFI->hasVarSizedObjects())
- emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP,
- -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
-}
-
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index.
-int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- unsigned FrameReg;
- return getFrameIndexReference(MF, FI, FrameReg);
-}
-
-/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
-/// debug info. It's the same as what we use for resolving the code-gen
-/// references for now. FIXME: This can go wrong when references are
-/// SP-relative and simple call frames aren't used.
-int ARM64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- unsigned &FrameReg) const {
- return resolveFrameIndexReference(MF, FI, FrameReg);
-}
-
-int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
- int FI, unsigned &FrameReg,
- bool PreferFP) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARM64RegisterInfo *RegInfo =
- static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
- const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
- int FPOffset = MFI->getObjectOffset(FI) + 16;
- int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
- bool isFixed = MFI->isFixedObjectIndex(FI);
-
- // Use frame pointer to reference fixed objects. Use it for locals if
- // there are VLAs (and thus the SP isn't reliable as a base).
- // Make sure useFPForScavengingIndex() does the right thing for the emergency
- // spill slot.
- bool UseFP = false;
- if (AFI->hasStackFrame()) {
- // Note: Keeping the following as multiple 'if' statements rather than
- // merging to a single expression for readability.
- //
- // Argument access should always use the FP.
- if (isFixed) {
- UseFP = hasFP(MF);
- } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
- // Use SP or FP, whichever gives us the best chance of the offset
- // being in range for direct access. If the FPOffset is positive,
- // that'll always be best, as the SP will be even further away.
- // If the FPOffset is negative, we have to keep in mind that the
- // available offset range for negative offsets is smaller than for
- // positive ones. If we have variable sized objects, we're stuck with
- // using the FP regardless, though, as the SP offset is unknown
- // and we don't have a base pointer available. If an offset is
- // available via the FP and the SP, use whichever is closest.
- if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
- (FPOffset >= -256 && Offset > -FPOffset))
- UseFP = true;
- }
- }
-
- if (UseFP) {
- FrameReg = RegInfo->getFrameRegister(MF);
- return FPOffset;
- }
-
- // Use the base pointer if we have one.
- if (RegInfo->hasBasePointer(MF))
- FrameReg = RegInfo->getBaseRegister();
- else {
- FrameReg = ARM64::SP;
- // If we're using the red zone for this function, the SP won't actually
- // be adjusted, so the offsets will be negative. They're also all
- // within range of the signed 9-bit immediate instructions.
- if (canUseRedZone(MF))
- Offset -= AFI->getLocalStackSize();
- }
-
- return Offset;
-}
-
-static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
- if (Reg != ARM64::LR)
- return getKillRegState(true);
-
- // LR maybe referred to later by an @llvm.returnaddress intrinsic.
- bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM64::LR);
- bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
- return getKillRegState(LRKill);
-}
-
-bool ARM64FrameLowering::spillCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- unsigned Count = CSI.size();
- DebugLoc DL;
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
-
- if (MI != MBB.end())
- DL = MI->getDebugLoc();
-
- for (unsigned i = 0; i < Count; i += 2) {
- unsigned idx = Count - i - 2;
- unsigned Reg1 = CSI[idx].getReg();
- unsigned Reg2 = CSI[idx + 1].getReg();
- // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
- // list to come in sorted by frame index so that we can issue the store
- // pair instructions directly. Assert if we see anything otherwise.
- //
- // The order of the registers in the list is controlled by
- // getCalleeSavedRegs(), so they will always be in-order, as well.
- assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
- "Out of order callee saved regs!");
- unsigned StrOpc;
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
- assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
- // Issue sequence of non-sp increment and pi sp spills for cs regs. The
- // first spill is a pre-increment that allocates the stack.
- // For example:
- // stp x22, x21, [sp, #-48]! // addImm(-6)
- // stp x20, x19, [sp, #16] // addImm(+2)
- // stp fp, lr, [sp, #32] // addImm(+4)
- // Rationale: This sequence saves uop updates compared to a sequence of
- // pre-increment spills like stp xi,xj,[sp,#-16]!
- // Note: Similar rational and sequence for restores in epilog.
- if (ARM64::GPR64RegClass.contains(Reg1)) {
- assert(ARM64::GPR64RegClass.contains(Reg2) &&
- "Expected GPR64 callee-saved register pair!");
- // For first spill use pre-increment store.
- if (i == 0)
- StrOpc = ARM64::STPXpre;
- else
- StrOpc = ARM64::STPXi;
- } else if (ARM64::FPR64RegClass.contains(Reg1)) {
- assert(ARM64::FPR64RegClass.contains(Reg2) &&
- "Expected FPR64 callee-saved register pair!");
- // For first spill use pre-increment store.
- if (i == 0)
- StrOpc = ARM64::STPDpre;
- else
- StrOpc = ARM64::STPDi;
- } else
- llvm_unreachable("Unexpected callee saved register!");
- DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
- << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx()
- << ", " << CSI[idx + 1].getFrameIdx() << ")\n");
- // Compute offset: i = 0 => offset = -Count;
- // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
- const int Offset = (i == 0) ? -Count : i;
- assert((Offset >= -64 && Offset <= 63) &&
- "Offset out of bounds for STP immediate");
- BuildMI(MBB, MI, DL, TII.get(StrOpc))
- .addReg(Reg2, getPrologueDeath(MF, Reg2))
- .addReg(Reg1, getPrologueDeath(MF, Reg1))
- .addReg(ARM64::SP)
- .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
- .setMIFlag(MachineInstr::FrameSetup);
- }
- return true;
-}
-
-bool ARM64FrameLowering::restoreCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- unsigned Count = CSI.size();
- DebugLoc DL;
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
-
- if (MI != MBB.end())
- DL = MI->getDebugLoc();
-
- for (unsigned i = 0; i < Count; i += 2) {
- unsigned Reg1 = CSI[i].getReg();
- unsigned Reg2 = CSI[i + 1].getReg();
- // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
- // list to come in sorted by frame index so that we can issue the store
- // pair instructions directly. Assert if we see anything otherwise.
- assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
- "Out of order callee saved regs!");
- // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
- // the last load is sp-pi post-increment and de-allocates the stack:
- // For example:
- // ldp fp, lr, [sp, #32] // addImm(+4)
- // ldp x20, x19, [sp, #16] // addImm(+2)
- // ldp x22, x21, [sp], #48 // addImm(+6)
- // Note: see comment in spillCalleeSavedRegisters()
- unsigned LdrOpc;
-
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
- assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
- if (ARM64::GPR64RegClass.contains(Reg1)) {
- assert(ARM64::GPR64RegClass.contains(Reg2) &&
- "Expected GPR64 callee-saved register pair!");
- if (i == Count - 2)
- LdrOpc = ARM64::LDPXpost;
- else
- LdrOpc = ARM64::LDPXi;
- } else if (ARM64::FPR64RegClass.contains(Reg1)) {
- assert(ARM64::FPR64RegClass.contains(Reg2) &&
- "Expected FPR64 callee-saved register pair!");
- if (i == Count - 2)
- LdrOpc = ARM64::LDPDpost;
- else
- LdrOpc = ARM64::LDPDi;
- } else
- llvm_unreachable("Unexpected callee saved register!");
- DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
- << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx()
- << ", " << CSI[i + 1].getFrameIdx() << ")\n");
-
- // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4;
- // etc.
- const int Offset = (i == Count - 2) ? Count : Count - i - 2;
- assert((Offset >= -64 && Offset <= 63) &&
- "Offset out of bounds for LDP immediate");
- BuildMI(MBB, MI, DL, TII.get(LdrOpc))
- .addReg(Reg2, getDefRegState(true))
- .addReg(Reg1, getDefRegState(true))
- .addReg(ARM64::SP)
- .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8]
- // where the factor * 8 is implicit
- }
- return true;
-}
-
-void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(
- MachineFunction &MF, RegScavenger *RS) const {
- const ARM64RegisterInfo *RegInfo =
- static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
- ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
- MachineRegisterInfo *MRI = &MF.getRegInfo();
- SmallVector<unsigned, 4> UnspilledCSGPRs;
- SmallVector<unsigned, 4> UnspilledCSFPRs;
-
- // The frame record needs to be created by saving the appropriate registers
- if (hasFP(MF)) {
- MRI->setPhysRegUsed(ARM64::FP);
- MRI->setPhysRegUsed(ARM64::LR);
- }
-
- // Spill the BasePtr if it's used. Do this first thing so that the
- // getCalleeSavedRegs() below will get the right answer.
- if (RegInfo->hasBasePointer(MF))
- MRI->setPhysRegUsed(RegInfo->getBaseRegister());
-
- // If any callee-saved registers are used, the frame cannot be eliminated.
- unsigned NumGPRSpilled = 0;
- unsigned NumFPRSpilled = 0;
- bool ExtraCSSpill = false;
- bool CanEliminateFrame = true;
- DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
-
- // Check pairs of consecutive callee-saved registers.
- for (unsigned i = 0; CSRegs[i]; i += 2) {
- assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
-
- const unsigned OddReg = CSRegs[i];
- const unsigned EvenReg = CSRegs[i + 1];
- assert((ARM64::GPR64RegClass.contains(OddReg) &&
- ARM64::GPR64RegClass.contains(EvenReg)) ^
- (ARM64::FPR64RegClass.contains(OddReg) &&
- ARM64::FPR64RegClass.contains(EvenReg)) &&
- "Register class mismatch!");
-
- const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
- const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
-
- // Early exit if none of the registers in the register pair is actually
- // used.
- if (!OddRegUsed && !EvenRegUsed) {
- if (ARM64::GPR64RegClass.contains(OddReg)) {
- UnspilledCSGPRs.push_back(OddReg);
- UnspilledCSGPRs.push_back(EvenReg);
- } else {
- UnspilledCSFPRs.push_back(OddReg);
- UnspilledCSFPRs.push_back(EvenReg);
- }
- continue;
- }
-
- unsigned Reg = ARM64::NoRegister;
- // If only one of the registers of the register pair is used, make sure to
- // mark the other one as used as well.
- if (OddRegUsed ^ EvenRegUsed) {
- // Find out which register is the additional spill.
- Reg = OddRegUsed ? EvenReg : OddReg;
- MRI->setPhysRegUsed(Reg);
- }
-
- DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
- DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
-
- assert(((OddReg == ARM64::LR && EvenReg == ARM64::FP) ||
- (RegInfo->getEncodingValue(OddReg) + 1 ==
- RegInfo->getEncodingValue(EvenReg))) &&
- "Register pair of non-adjacent registers!");
- if (ARM64::GPR64RegClass.contains(OddReg)) {
- NumGPRSpilled += 2;
- // If it's not a reserved register, we can use it in lieu of an
- // emergency spill slot for the register scavenger.
- // FIXME: It would be better to instead keep looking and choose another
- // unspilled register that isn't reserved, if there is one.
- if (Reg != ARM64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
- ExtraCSSpill = true;
- } else
- NumFPRSpilled += 2;
-
- CanEliminateFrame = false;
- }
-
- // FIXME: Set BigStack if any stack slot references may be out of range.
- // For now, just conservatively guestimate based on unscaled indexing
- // range. We'll end up allocating an unnecessary spill slot a lot, but
- // realistically that's not a big deal at this stage of the game.
- // The CSR spill slots have not been allocated yet, so estimateStackSize
- // won't include them.
- MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
- DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
- bool BigStack = (CFSize >= 256);
- if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
- AFI->setHasStackFrame(true);
-
- // Estimate if we might need to scavenge a register at some point in order
- // to materialize a stack offset. If so, either spill one additional
- // callee-saved register or reserve a special spill slot to facilitate
- // register scavenging. If we already spilled an extra callee-saved register
- // above to keep the number of spills even, we don't need to do anything else
- // here.
- if (BigStack && !ExtraCSSpill) {
-
- // If we're adding a register to spill here, we have to add two of them
- // to keep the number of regs to spill even.
- assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
- unsigned Count = 0;
- while (!UnspilledCSGPRs.empty() && Count < 2) {
- unsigned Reg = UnspilledCSGPRs.back();
- UnspilledCSGPRs.pop_back();
- DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
- << " to get a scratch register.\n");
- MRI->setPhysRegUsed(Reg);
- ExtraCSSpill = true;
- ++Count;
- }
-
- // If we didn't find an extra callee-saved register to spill, create
- // an emergency spill slot.
- if (!ExtraCSSpill) {
- const TargetRegisterClass *RC = &ARM64::GPR64RegClass;
- int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
- RS->addScavengingFrameIndex(FI);
- DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
- << " as the emergency spill slot.\n");
- }
- }
-}