aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Vectorize/VecUtils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Vectorize/VecUtils.cpp')
-rw-r--r--lib/Transforms/Vectorize/VecUtils.cpp226
1 files changed, 174 insertions, 52 deletions
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp
index 9b9436683b..21e6cdde30 100644
--- a/lib/Transforms/Vectorize/VecUtils.cpp
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -46,7 +46,7 @@ namespace llvm {
BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
- BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
+ Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
numberInstructions();
}
@@ -121,6 +121,7 @@ bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
if (Cost < CostThreshold) {
DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
+ Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands,VF)));
vectorizeTree(Operands, VF);
i += VF - 1;
Changed = true;
@@ -131,7 +132,7 @@ bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
}
bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
- ValueSet Heads, Tails;
+ SetVector<Value*> Heads, Tails;
SmallDenseMap<Value*, Value*> ConsecutiveChain;
// We may run into multiple chains that merge into a single chain. We mark the
@@ -152,7 +153,8 @@ bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
}
// For stores that start but don't end a link in the chain:
- for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
+ for (SetVector<Value*>::iterator it = Heads.begin(), e = Heads.end();
+ it != e; ++it) {
if (Tails.count(*it)) continue;
// We found a store instr that starts a chain. Now follow the chain and try
@@ -224,9 +226,14 @@ Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
}
void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
+ int LastIdx = getLastIndex(Operands, Operands.size());
+ Instruction *Loc = getInsertionPoint(LastIdx);
+ Builder.SetInsertPoint(Loc);
+
+ assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx &&
+ "Vectorizing with in-tree users");
+
Value *Vec = vectorizeTree(Operands, Operands.size());
- BasicBlock::iterator Loc = cast<Instruction>(Vec);
- IRBuilder<> Builder(++Loc);
// After vectorizing the operands we need to generate extractelement
// instructions and replace all of the uses of the scalar values with
// the values that we extracted from the vectorized tree.
@@ -243,6 +250,16 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
LaneMap.clear();
MultiUserVals.clear();
MustScalarize.clear();
+ MustExtract.clear();
+
+ // Find the location of the last root.
+ int LastRootIndex = getLastIndex(VL, VL.size());
+ int FirstUserIndex = getFirstUserIndex(VL, VL.size());
+
+ // Don't vectorize if there are users of the tree roots inside the tree
+ // itself.
+ if (LastRootIndex > FirstUserIndex)
+ return max_cost;
// Scan the tree and find which value is used by which lane, and which values
// must be scalarized.
@@ -250,7 +267,7 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
// Check that instructions with multiple users can be vectorized. Mark unsafe
// instructions.
- for (ValueSet::iterator it = MultiUserVals.begin(),
+ for (SetVector<Value*>::iterator it = MultiUserVals.begin(),
e = MultiUserVals.end(); it != e; ++it) {
// Check that all of the users of this instr are within the tree
// and that they are all from the same lane.
@@ -258,15 +275,35 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
I != E; ++I) {
if (LaneMap.find(*I) == LaneMap.end()) {
- MustScalarize.insert(*it);
- DEBUG(dbgs()<<"SLP: Adding " << **it <<
- " to MustScalarize because of an out of tree usage.\n");
- break;
+ DEBUG(dbgs()<<"SLP: Instr " << **it << " has multiple users.\n");
+
+ // We don't have an ordering problem if the user is not in this basic
+ // block.
+ Instruction *Inst = cast<Instruction>(*I);
+ if (Inst->getParent() != BB) {
+ MustExtract.insert(*it);
+ continue;
+ }
+
+ // We don't have an ordering problem if the user is after the last root.
+ int Idx = InstrIdx[Inst];
+ if (Idx < LastRootIndex) {
+ MustScalarize.insert(*it);
+ DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
+ "because of an unsafe out of tree usage.\n");
+ break;
+ }
+
+
+ DEBUG(dbgs()<<"SLP: Adding to MustExtract "
+ "because of a safe out of tree usage.\n");
+ MustExtract.insert(*it);
+ continue;
}
if (Lane == -1) Lane = LaneMap[*I];
if (Lane != LaneMap[*I]) {
MustScalarize.insert(*it);
- DEBUG(dbgs()<<"Adding " << **it <<
+ DEBUG(dbgs()<<"SLP: Adding " << **it <<
" to MustScalarize because multiple lane use it: "
<< Lane << " and " << LaneMap[*I] << ".\n");
break;
@@ -311,14 +348,6 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (!I || Opcode != I->getOpcode()) return;
}
- // Mark instructions with multiple users.
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- // Remember to check if all of the users of this instr are vectorized
- // within our tree.
- if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
- }
-
for (int i = 0, e = VL.size(); i < e; ++i) {
// Check that the instruction is only used within
// one lane.
@@ -327,6 +356,19 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
LaneMap[VL[i]] = i;
}
+ // Mark instructions with multiple users.
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // Remember to check if all of the users of this instr are vectorized
+ // within our tree. At depth zero we have no local users, only external
+ // users that we don't care about.
+ if (Depth && I && I->getNumUses() > 1) {
+ DEBUG(dbgs()<<"SLP: Adding to MultiUserVals "
+ "because it has multiple users:" << *I << " \n");
+ MultiUserVals.insert(I);
+ }
+ }
+
switch (Opcode) {
case Instruction::ZExt:
case Instruction::SExt:
@@ -440,11 +482,9 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
// Check if it is safe to sink the loads or the stores.
if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
- int MaxIdx = InstrIdx[VL0];
- for (unsigned i = 1, e = VL.size(); i < e; ++i )
- MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
-
+ int MaxIdx = getLastIndex(VL, VL.size());
Instruction *Last = InstrVec[MaxIdx];
+
for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
if (VL[i] == Last) continue;
Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
@@ -456,6 +496,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
}
+ // Calculate the extract cost.
+ unsigned ExternalUserExtractCost = 0;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i)
+ if (MustExtract.count(VL[i]))
+ ExternalUserExtractCost +=
+ TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
+
switch (Opcode) {
case Instruction::ZExt:
case Instruction::SExt:
@@ -469,7 +516,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- int Cost = 0;
+ int Cost = ExternalUserExtractCost;
ValueList Operands;
Type *SrcTy = VL0->getOperand(0)->getType();
// Prepare the operand vector.
@@ -510,7 +557,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- int Cost = 0;
+ int Cost = ExternalUserExtractCost;
// Calculate the cost of all of the operands.
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
@@ -540,7 +587,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
int ScalarLdCost = VecTy->getNumElements() *
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
- return VecLdCost - ScalarLdCost;
+ return VecLdCost - ScalarLdCost + ExternalUserExtractCost;
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
@@ -556,7 +603,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
- return TotalCost;
+ return TotalCost + ExternalUserExtractCost;
}
default:
// Unable to vectorize unknown instructions.
@@ -564,15 +611,40 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
}
-Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
+int BoUpSLP::getLastIndex(ArrayRef<Value *> VL, unsigned VF) {
int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
for (unsigned i = 0; i < VF; ++i )
MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
- return InstrVec[MaxIdx + 1];
+ return MaxIdx;
+}
+
+int BoUpSLP::getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF) {
+ // Find the first user of the values.
+ int FirstUser = InstrVec.size();
+ for (unsigned i = 0; i < VF; ++i) {
+ for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end();
+ U != UE; ++U) {
+ Instruction *Instr = dyn_cast<Instruction>(*U);
+ if (!Instr || Instr->getParent() != BB)
+ continue;
+
+ FirstUser = std::min(FirstUser, InstrIdx[Instr]);
+ }
+ }
+ return FirstUser;
+}
+
+int BoUpSLP::getLastIndex(Instruction *I, Instruction *J) {
+ assert(I->getParent() == BB && "Invalid parent for instruction I");
+ assert(J->getParent() == BB && "Invalid parent for instruction J");
+ return std::max(InstrIdx[I],InstrIdx[J]);
+}
+
+Instruction *BoUpSLP::getInsertionPoint(unsigned Index) {
+ return InstrVec[Index + 1];
}
Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
- IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
Value *Vec = UndefValue::get(Ty);
for (unsigned i=0; i < Ty->getNumElements(); ++i) {
// Generate the 'InsertElement' instruction.
@@ -583,15 +655,51 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
GatherInstructions.push_back(Vec);
}
+ for (unsigned i = 0; i < Ty->getNumElements(); ++i)
+ VectorizedValues[VL[i]] = Vec;
+
return Vec;
}
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
Value *V = vectorizeTree_rec(VL, VF);
+
+ int LastInstrIdx = getLastIndex(VL, VL.size());
+ for (SetVector<Value*>::iterator it = MustExtract.begin(),
+ e = MustExtract.end(); it != e; ++it) {
+ Instruction *I = cast<Instruction>(*it);
+
+ // This is a scalarized value, so we can use the original value.
+ // No need to extract from the vector.
+ if (!LaneMap.count(I))
+ continue;
+
+ Value *Vec = VectorizedValues[I];
+ // We decided not to vectorize I because one of its users was not
+ // vectorizerd. This is okay.
+ if (!Vec)
+ continue;
+
+ Value *Idx = Builder.getInt32(LaneMap[I]);
+ Value *Extract = Builder.CreateExtractElement(Vec, Idx);
+ bool Replaced = false;
+ for (Value::use_iterator U = I->use_begin(), UE = I->use_end(); U != UE;
+ ++U) {
+ Instruction *UI = cast<Instruction>(*U);
+ if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx)
+ UI->replaceUsesOfWith(I ,Extract);
+ Replaced = true;
+ }
+ assert(Replaced && "Must replace at least one outside user");
+ (void)Replaced;
+ }
+
// We moved some instructions around. We have to number them again
// before we can do any analysis.
numberInstructions();
MustScalarize.clear();
+ MustExtract.clear();
+ VectorizedValues.clear();
return V;
}
@@ -614,19 +722,27 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
}
// Check that this is a simple vector constant.
- if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
+ if (AllConst || AllSameScalar)
+ return Scalarize(VL, VecTy);
// Scalarize unknown structures.
Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
- if (!VL0) return Scalarize(VL, VecTy);
+ if (!VL0)
+ return Scalarize(VL, VecTy);
- if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
+ if (VectorizedValues.count(VL0)) {
+ Value * Vec = VectorizedValues[VL0];
+ for (int i = 0; i < VF; ++i)
+ VectorizedValues[VL[i]] = Vec;
+ return Vec;
+ }
unsigned Opcode = VL0->getOpcode();
for (unsigned i = 0, e = VF; i < e; ++i) {
Instruction *I = dyn_cast<Instruction>(VL[i]);
// If not all of the instructions are identical then we have to scalarize.
- if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
+ if (!I || Opcode != I->getOpcode())
+ return Scalarize(VL, VecTy);
}
switch (Opcode) {
@@ -646,10 +762,12 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
for (int i = 0; i < VF; ++i)
INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
Value *InVec = vectorizeTree_rec(INVL, VF);
- IRBuilder<> Builder(GetLastInstr(VL, VF));
CastInst *CI = dyn_cast<CastInst>(VL0);
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
- VectorizedValues[VL0] = V;
+
+ for (int i = 0; i < VF; ++i)
+ VectorizedValues[VL[i]] = V;
+
return V;
}
case Instruction::Add:
@@ -672,16 +790,18 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
case Instruction::Xor: {
ValueList LHSVL, RHSVL;
for (int i = 0; i < VF; ++i) {
- RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
- LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
+ LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
}
- Value *RHS = vectorizeTree_rec(RHSVL, VF);
Value *LHS = vectorizeTree_rec(LHSVL, VF);
- IRBuilder<> Builder(GetLastInstr(VL, VF));
+ Value *RHS = vectorizeTree_rec(RHSVL, VF);
BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
- VectorizedValues[VL0] = V;
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS);
+
+ for (int i = 0; i < VF; ++i)
+ VectorizedValues[VL[i]] = V;
+
return V;
}
case Instruction::Load: {
@@ -693,12 +813,18 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
if (!isConsecutiveAccess(VL[i-1], VL[i]))
return Scalarize(VL, VecTy);
- IRBuilder<> Builder(GetLastInstr(VL, VF));
- Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
- VecTy->getPointerTo());
- LI = Builder.CreateLoad(VecPtr);
+ // Loads are inserted at the head of the tree because we don't want to sink
+ // them all the way down past store instructions.
+ Instruction *Loc = getInsertionPoint(getLastIndex(VL, VL.size()));
+ IRBuilder<> LoadBuilder(Loc);
+ Value *VecPtr = LoadBuilder.CreateBitCast(LI->getPointerOperand(),
+ VecTy->getPointerTo());
+ LI = LoadBuilder.CreateLoad(VecPtr);
LI->setAlignment(Alignment);
- VectorizedValues[VL0] = LI;
+
+ for (int i = 0; i < VF; ++i)
+ VectorizedValues[VL[i]] = LI;
+
return LI;
}
case Instruction::Store: {
@@ -710,8 +836,6 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
Value *VecValue = vectorizeTree_rec(ValueOp, VF);
-
- IRBuilder<> Builder(GetLastInstr(VL, VF));
Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
VecTy->getPointerTo());
Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
@@ -721,9 +845,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
return 0;
}
default:
- Value *S = Scalarize(VL, VecTy);
- VectorizedValues[VL0] = S;
- return S;
+ return Scalarize(VL, VecTy);
}
}