From d7d003c2b7b7f657eed364e4ac06f4ab32fc8c2d Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Thu, 2 Aug 2012 00:56:42 +0000 Subject: X86 Peephole: fold loads to the source register operand if possible. Machine CSE and other optimizations can remove instructions so folding is possible at peephole while not possible at ISel. This patch is a rework of r160919 and was tested on clang self-host on my local machine. rdar://10554090 and rdar://11873276 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161152 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PeepholeOptimizer.cpp | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'lib/CodeGen/PeepholeOptimizer.cpp') diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 91c33c4af4..d9474bf240 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -78,6 +78,7 @@ STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); +STATISTIC(NumLoadFold, "Number of loads folded"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -114,6 +115,7 @@ namespace { bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); + bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg); }; } @@ -384,6 +386,29 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, return false; } +/// isLoadFoldable - Check whether MI is a candidate for folding into a later +/// instruction. We only fold loads to virtual registers and the virtual +/// register defined has a single use. +bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, + unsigned &FoldAsLoadDefReg) { + if (MI->canFoldAsLoad()) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.getNumDefs() == 1) { + unsigned Reg = MI->getOperand(0).getReg(); + // To reduce compilation time, we check MRI->hasOneUse when inserting + // loads. It should be checked when processing uses of the load, since + // uses can be removed during peephole. + if (!MI->getOperand(0).getSubReg() && + TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->hasOneUse(Reg)) { + FoldAsLoadDefReg = Reg; + return true; + } + } + } + return false; +} + bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs) { @@ -441,6 +466,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SmallPtrSet LocalMIs; SmallSet ImmDefRegs; DenseMap ImmDefMIs; + unsigned FoldAsLoadDefReg; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; @@ -448,6 +474,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); + FoldAsLoadDefReg = 0; bool First = true; MachineBasicBlock::iterator PMII; @@ -456,12 +483,17 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = &*MII; LocalMIs.insert(MI); + // If there exists an instruction which belongs to the following + // categories, we will discard the load candidate. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { + FoldAsLoadDefReg = 0; ++MII; continue; } + if (MI->mayStore() || MI->isCall()) + FoldAsLoadDefReg = 0; if (MI->isBitcast()) { if (optimizeBitcastInstr(MI, MBB)) { @@ -489,6 +521,31 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } + // Check whether MI is a load candidate for folding into a later + // instruction. If MI is not a candidate, check whether we can fold an + // earlier load into MI. + if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { + // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr + // can enable folding by converting SUB to CMP. + MachineInstr *DefMI = 0; + MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, + FoldAsLoadDefReg, DefMI); + if (FoldMI) { + // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. + LocalMIs.erase(MI); + LocalMIs.erase(DefMI); + LocalMIs.insert(FoldMI); + MI->eraseFromParent(); + DefMI->eraseFromParent(); + ++NumLoadFold; + + // MI is replaced with FoldMI. + Changed = true; + PMII = FoldMI; + MII = llvm::next(PMII); + continue; + } + } First = false; PMII = MII; ++MII; -- cgit v1.2.3