diff options
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 82 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/unal-altivec.ll | 25 |
2 files changed, 92 insertions, 15 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f47376cc51..87b63a8f0f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6781,6 +6781,75 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, return SDValue(); } +// Return true is there is a nearyby consecutive load to the one provided +// (regardless of alignment). We search up and down the chain, looking though +// token factors and other loads (but nothing else). As a result, a true +// results indicates that it is safe to create a new consecutive load adjacent +// to the load provided. +static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { + SDValue Chain = LD->getChain(); + EVT VT = LD->getMemoryVT(); + + SmallSet<SDNode *, 16> LoadRoots; + SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); + SmallSet<SDNode *, 16> Visited; + + // First, search up the chain, branching to follow all token-factor operands. + // If we find a consecutive load, then we're done, otherwise, record all + // nodes just above the top-level loads and token factors. + while (!Queue.empty()) { + SDNode *ChainNext = Queue.pop_back_val(); + if (!Visited.insert(ChainNext)) + continue; + + if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) { + if (DAG.isConsecutiveLoad(ChainLD, LD, VT.getStoreSize(), 1)) + return true; + + if (!Visited.count(ChainLD->getChain().getNode())) + Queue.push_back(ChainLD->getChain().getNode()); + } else if (ChainNext->getOpcode() == ISD::TokenFactor) { + for (SDNode::op_iterator O = ChainNext->op_begin(), + OE = ChainNext->op_end(); O != OE; ++O) + if (!Visited.count(O->getNode())) + Queue.push_back(O->getNode()); + } else + LoadRoots.insert(ChainNext); + } + + // Second, search down the chain, starting from the top-level nodes recorded + // in the first phase. These top-level nodes are the nodes just above all + // loads and token factors. Starting with their uses, recursively look though + // all loads (just the chain uses) and token factors to find a consecutive + // load. + Visited.clear(); + Queue.clear(); + + for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), + IE = LoadRoots.end(); I != IE; ++I) { + Queue.push_back(*I); + + while (!Queue.empty()) { + SDNode *LoadRoot = Queue.pop_back_val(); + if (!Visited.insert(LoadRoot)) + continue; + + if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot)) + if (DAG.isConsecutiveLoad(ChainLD, LD, VT.getStoreSize(), 1)) + return true; + + for (SDNode::use_iterator UI = LoadRoot->use_begin(), + UE = LoadRoot->use_end(); UI != UE; ++UI) + if (((isa<LoadSDNode>(*UI) && + cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) || + UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) + Queue.push_back(*UI); + } + } + + return false; +} + SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { const TargetMachine &TM = getTargetMachine(); @@ -7015,12 +7084,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // require the next load to appear to be aligned, even though it // is actually offset from the base pointer by a lesser amount. int IncOffset = VT.getSizeInBits() / 8; - int IncValue = IncOffset - 1; + int IncValue = IncOffset; + + // Walk (both up and down) the chain looking for another load at the real + // (aligned) offset (the alignment of the other load does not matter in + // this case). If found, then do not use the offset reduction trick, as + // that will prevent the loads from being later combined (as they would + // otherwise be duplicates). + if (!findConsecutiveLoad(LD, DAG)) + --IncValue; + SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); - // FIXME: We might have another load (with a slightly-different - // real offset) that we can reuse here. SDValue ExtraLoad = DAG.getLoad(VT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncOffset), diff --git a/test/CodeGen/PowerPC/unal-altivec.ll b/test/CodeGen/PowerPC/unal-altivec.ll index dec93ebf76..7f333a1c50 100644 --- a/test/CodeGen/PowerPC/unal-altivec.ll +++ b/test/CodeGen/PowerPC/unal-altivec.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -mcpu=g5 | FileCheck %s -; RUN: llc < %s -mcpu=g5 | FileCheck %s -check-prefix=CHECK-PC target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -30,20 +29,22 @@ vector.body: ; preds = %vector.body, %vecto br i1 %10, label %for.end, label %vector.body ; CHECK: @foo -; CHECK: lvx [[CNST:[0-9]+]], -; CHECK-DAG: lvsl [[PC:[0-9]+]], [[B1:[0-9]+]], [[B2:[0-9]+]] -; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[B2]] -; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[B2]] -; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]], +; CHECK-DAG: li [[C0:[0-9]+]], 0 +; CHECK-DAG: li [[C16:[0-9]+]], 16 +; CHECK-DAG: li [[C31:[0-9]+]], 31 +; CHECK-DAG: lvx [[CNST:[0-9]+]], +; CHECK: .LBB0_1: +; CHECK-DAG: lvsl [[PC:[0-9]+]], [[B1:[0-9]+]], [[C0]] +; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[C0]] +; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[C0]] +; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]], [[C16]] +; CHECK-DAG: lvx [[LD3:[0-9]+]], [[B3]], [[C31]] ; CHECK-DAG: vperm [[R1:[0-9]+]], [[LD1]], [[LD2]], [[PC]] -; CHECK: vaddfp {{[0-9]+}}, [[R1]], [[CNST]] +; CHECK-DAG: vperm [[R2:[0-9]+]], [[LD2]], [[LD3]], [[PC]] +; CHECK-DAG: vaddfp {{[0-9]+}}, [[R1]], [[CNST]] +; CHECK-DAG: vaddfp {{[0-9]+}}, [[R2]], [[CNST]] ; CHECK: blr -; CHECK-PC: @foo -; CHECK-PC: lvsl -; CHECK-PC-NOT: lvsl -; CHECK-PC: blr - for.end: ; preds = %vector.body ret void } |