diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 405 |
1 files changed, 264 insertions, 141 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2b2713d248..4a6e5cf036 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -34,10 +34,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -214,6 +214,20 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, llvm_unreachable("Unknown mismatch!"); } +static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, + const Twine &ErrMsg) { + const Instruction *I = dyn_cast_or_null<Instruction>(V); + if (!V) + return Ctx.emitError(ErrMsg); + + const char *AsmError = ", possible invalid constraint for vector type"; + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + return Ctx.emitError(I, ErrMsg + AsmError); + + return Ctx.emitError(I, ErrMsg); +} + /// getCopyFromPartsVector - Create a value that contains the specified legal /// parts combined into the value they represent. If the parts combine to a /// type larger then ValueVT then AssertOp can be used to specify whether the @@ -306,16 +320,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() != 1) { - LLVMContext &Ctx = *DAG.getContext(); - Twine ErrMsg("non-trivial scalar-to-vector conversion"); - if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { - if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (isa<InlineAsm>(CI->getCalledValue())) - ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; - Ctx.emitError(I, ErrMsg); - } else { - Ctx.emitError(ErrMsg); - } + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "non-trivial scalar-to-vector conversion"); return DAG.getUNDEF(ValueVT); } @@ -397,18 +403,9 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - if (PartEVT != ValueVT) { - LLVMContext &Ctx = *DAG.getContext(); - Twine ErrMsg("scalar-to-vector conversion failed"); - if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { - if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (isa<InlineAsm>(CI->getCalledValue())) - ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; - Ctx.emitError(I, ErrMsg); - } else { - Ctx.emitError(ErrMsg); - } - } + if (PartEVT != ValueVT) + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "scalar-to-vector conversion failed"); Parts[0] = Val; return; @@ -627,16 +624,6 @@ namespace { } } - /// areValueTypesLegal - Return true if types of all the values are legal. - bool areValueTypesLegal(const TargetLowering &TLI) { - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - MVT RegisterVT = RegVTs[Value]; - if (!TLI.isTypeLegal(RegisterVT)) - return false; - } - return true; - } - /// append - Add the specified values to this one. void append(const RegsForValue &RHS) { ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); @@ -851,12 +838,20 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); Ops.push_back(Res); + unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); - Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + unsigned TheReg = Regs[Reg++]; + Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); + + if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { + // If we clobbered the stack pointer, MFI should know about it. + assert(DAG.getMachineFunction().getFrameInfo()-> + hasInlineAsmWithSPAdjust()); + } } } } @@ -866,7 +861,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - TD = DAG.getTarget().getDataLayout(); + DL = DAG.getTarget().getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -884,6 +879,7 @@ void SelectionDAGBuilder::clear() { PendingExports.clear(); CurInst = NULL; HasTailCall = false; + SDNodeOrder = LowestSDNodeOrder; } /// clearDanglingDebugInfo - Clear the dangling debug information @@ -1384,7 +1380,9 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock *SwitchBB, + uint32_t TWeight, + uint32_t FWeight) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1409,7 +1407,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, } CaseBlock CB(Condition, BOp->getOperand(0), - BOp->getOperand(1), NULL, TBB, FBB, CurBB); + BOp->getOperand(1), NULL, TBB, FBB, CurBB, TWeight, FWeight); SwitchCases.push_back(CB); return; } @@ -1417,17 +1415,26 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), - NULL, TBB, FBB, CurBB); + NULL, TBB, FBB, CurBB, TWeight, FWeight); SwitchCases.push_back(CB); } +/// Scale down both weights to fit into uint32_t. +static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { + uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; + uint32_t Scale = (NewMax / UINT32_MAX) + 1; + NewTrue = NewTrue / Scale; + NewFalse = NewFalse / Scale; +} + /// FindMergedConditions - If Cond is an expression like void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - unsigned Opc) { + unsigned Opc, uint32_t TWeight, + uint32_t FWeight) { // If this node is not part of the or/and tree, emit it as a branch. const Instruction *BOp = dyn_cast<Instruction>(Cond); if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || @@ -1435,7 +1442,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { - EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, + TWeight, FWeight); return; } @@ -1447,6 +1455,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, if (Opc == Instruction::Or) { // Codegen X | Y as: + // BB1: // jmp_if_X TBB // jmp TmpBB // TmpBB: @@ -1454,14 +1463,34 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // jmp FBB // + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice + // assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + + uint64_t NewTrueWeight = TWeight; + uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); + NewTrueWeight = TWeight; + NewFalseWeight = 2 * (uint64_t)FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: + // BB1: // jmp_if_X TmpBB // jmp FBB // TmpBB: @@ -1470,11 +1499,28 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // // This requires creation of TmpBB after CurBB. + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice + // assumes that + // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. + + uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; + uint64_t NewFalseWeight = FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); + NewTrueWeight = 2 * (uint64_t)TWeight; + NewFalseWeight = FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); } } @@ -1525,8 +1571,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Update machine-CFG edges. BrMBB->addSuccessor(Succ0MBB); - // If this is not a fall-through branch, emit the branch. - if (Succ0MBB != NextBlock) + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); @@ -1561,7 +1608,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - BOp->getOpcode()); + BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), + getEdgeWeight(BrMBB, Succ1MBB)); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. @@ -2351,7 +2399,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, volatile double RDensity = (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); - double Metric = Range.logBase2()*(LDensity+RDensity); + volatile double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place DEBUG(dbgs() <<"=>Step\n" << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' @@ -2590,7 +2638,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, if (Cases.size() >= 2) // Must recompute end() each iteration because it may be // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); + for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); J != Cases.end(); ) { const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); @@ -2936,6 +2984,13 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { if (DestVT != N.getValueType()) setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), DestVT, N)); // convert types. + // Check if the original LLVM IR Operand was a ConstantInt, because getValue() + // might fold any kind of constant expression to an integer constant and that + // is not what we are looking for. Only regcognize a bitcast of a genuine + // constant integer as an opaque constant. + else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) + setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, + /*isOpaque*/true)); else setValue(&I, N); // noop cast. } @@ -3261,7 +3316,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset - uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); + uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, DAG.getConstant(Offset, N.getValueType())); } @@ -3275,7 +3330,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; uint64_t Offs = - TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; EVT PTy = TLI->getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); @@ -3292,7 +3347,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // N = N + Idx * ElementSize; APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), - TD->getTypeAllocSize(Ty)); + DL->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend @@ -3370,9 +3425,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); - // Inform the Frame Information that we have just allocated a variable-sized - // object. - FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); + assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { @@ -3400,7 +3453,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Root; bool ConstantMemory = false; - if (I.isVolatile() || NumValues > MaxParallelChains) + if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory( @@ -3413,6 +3466,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } + const TargetLowering *TLI = TM.getTargetLowering(); + if (isVolatile) + Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), NumValues)); @@ -3536,14 +3593,15 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); - AtomicOrdering Order = I.getOrdering(); + AtomicOrdering SuccessOrder = I.getSuccessOrdering(); + AtomicOrdering FailureOrder = I.getFailureOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); const TargetLowering *TLI = TM.getTargetLowering(); if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, DAG, *TLI); SDValue L = @@ -3554,13 +3612,14 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, + TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); SDValue OutChain = L.getValue(1); if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, DAG, *TLI); setValue(&I, L); @@ -3637,6 +3696,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); + InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), @@ -5283,7 +5343,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; SmallVector<Value *, 4> Allocas; - GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); + GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { @@ -5324,6 +5384,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { (void)getControlRoot(); return 0; } + case Intrinsic::clear_cache: + return TLI->getClearCacheBuiltinName(); case Intrinsic::donothing: // ignore return 0; @@ -5366,6 +5428,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, int DemoteStackIdx = -100; if (!CanLowerReturn) { + assert(!CS.hasInAllocaArgument() && + "sret demotion is incompatible with inalloca"); uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( FTy->getReturnType()); unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( @@ -5508,9 +5572,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the /// value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + for (const User *U : V->users()) { + if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) if (IC->isEquality()) if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) if (C->isNullValue()) @@ -5534,7 +5597,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), - Builder.TD)) + Builder.DL)) return Builder.getValue(LoadCst); } @@ -5653,9 +5716,13 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // bloat the code. const TargetLowering *TLI = TM.getTargetLowering(); if (ActuallyDoIt && CSize->getZExtValue() > 4) { + unsigned DstAS = LHS->getType()->getPointerAddressSpace(); + unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) + if (!TLI->isTypeLegal(LoadVT) || + !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) || + !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS)) ActuallyDoIt = false; } @@ -6026,7 +6093,7 @@ public: /// MVT::Other. EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout *TD) const { + const DataLayout *DL) const { if (CallOperandVal == 0) return MVT::Other; if (isa<BasicBlock>(CallOperandVal)) @@ -6052,7 +6119,7 @@ public: // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = TD->getTypeSizeInBits(OpTy); + unsigned BitSize = DL->getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: @@ -6108,7 +6175,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, // types are identical size, use a bitcast to convert (e.g. two differing // vector types). MVT RegVT = *PhysReg.second->vt_begin(); - if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; @@ -6241,7 +6308,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD). + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL). getSimpleVT(); } @@ -6716,11 +6783,11 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering *TLI = TM.getTargetLowering(); - const DataLayout &TD = *TLI->getDataLayout(); + const DataLayout &DL = *TLI->getDataLayout(); SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), - TD.getABITypeAlignment(I.getType())); + DL.getABITypeAlignment(I.getType())); setValue(&I, V); DAG.setRoot(V.getValue(1)); } @@ -6781,6 +6848,42 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, return TLI->LowerCallTo(CLI); } +/// \brief Add a stack map intrinsic call's live variable operands to a stackmap +/// or patchpoint target node's operand list. +/// +/// Constants are converted to TargetConstants purely as an optimization to +/// avoid constant materialization and register allocation. +/// +/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not +/// generate addess computation nodes, and so ExpandISelPseudo can convert the +/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids +/// address materialization and register allocation, but may also be required +/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an +/// alloca in the entry block, then the runtime may assume that the alloca's +/// StackMap location can be read immediately after compilation and that the +/// location is valid at any point during execution (this is similar to the +/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were +/// only available in a register, then the runtime would need to trap when +/// execution reaches the StackMap in order to read the alloca's location. +static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx, + SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder &Builder) { + for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) { + SDValue OpVal = Builder.getValue(CI.getArgOperand(i)); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { + const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); + Ops.push_back( + Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + } else + Ops.push_back(OpVal); + } +} + /// \brief Lower llvm.experimental.stackmap directly to its target opcode. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, @@ -6788,61 +6891,64 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); - SDValue Callee = getValue(CI.getCalledValue()); + SDValue Chain, InFlag, Callee, NullPtr; + SmallVector<SDValue, 32> Ops; - // Lower into a call sequence with no args and no return value. - std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee); - // Set the root to the target-lowered call chain. - SDValue Chain = Result.second; - DAG.setRoot(Chain); + SDLoc DL = getCurSDLoc(); + Callee = getValue(CI.getCalledValue()); + NullPtr = DAG.getIntPtrConstant(0, true); - /// Get a call instruction from the call sequence chain. - /// Tail calls are not allowed. - SDNode *CallEnd = Chain.getNode(); - assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && - "Expected a callseq node."); - SDNode *Call = CallEnd->getOperand(0).getNode(); - bool hasGlue = Call->getGluedNode(); + // The stackmap intrinsic only records the live variables (the arguemnts + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target specific lowering code. + // Instead we perform the call lowering right here. + // + // chain, flag = CALLSEQ_START(chain, 0) + // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) + // chain, flag = CALLSEQ_END(chain, 0, 0, flag) + // + Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); + InFlag = Chain.getValue(1); - // Replace the target specific call node with the stackmap intrinsic. - SmallVector<SDValue, 8> Ops; + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); - // Add the <id> and <numShadowBytes> constants. - for (unsigned i = 0; i < 2; ++i) { - SDValue tmp = getValue(CI.getOperand(i)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); - } // Push live variables for the stack map. - for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i) - Ops.push_back(getValue(CI.getArgOperand(i))); + addStackMapLiveVars(CI, 2, Ops, *this); - // Push the chain (this is originally the first operand of the call, but - // becomes now the last or second to last operand). - Ops.push_back(*(Call->op_begin())); + // We are not pushing any register mask info here on the operands list, + // because the stackmap doesn't clobber anything. - // Push the glue flag (last operand). - if (hasGlue) - Ops.push_back(*(Call->op_end()-1)); + // Push the chain and the glue flag. + Ops.push_back(Chain); + Ops.push_back(InFlag); + // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); + Chain = SDValue(SM, 0); + InFlag = Chain.getValue(1); - // Replace the target specific call node with a STACKMAP node. - MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(), - NodeTys, Ops); + Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); - // StackMap generates no value, so nothing goes in the NodeMap. + // Stackmaps don't generate values, so nothing goes into the NodeMap. - // Fixup the consumers of the intrinsic. The chain and glue may be used in the - // call sequence. - DAG.ReplaceAllUsesWith(Call, MN); + // Set the root to the target-lowered call chain. + DAG.setRoot(Chain); - DAG.DeleteNode(Call); + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo()->setHasStackMap(); } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { - // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>, + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, // i8* <target>, // i32 <numArgs>, @@ -6855,17 +6961,19 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { SDValue Callee = getValue(CI.getOperand(2)); // <target> // Get the real number of arguments participating in the call <numArgs> - unsigned NumArgs = - cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue(); + SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos)); + unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> - assert(CI.getNumArgOperands() >= NumArgs + 4 && + // Intrinsics include all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; std::pair<SDValue, SDValue> Result = - LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC); // Set the root to the target-lowered call chain. SDValue Chain = Result.second; @@ -6885,13 +6993,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Replace the target specific call node with the patchable intrinsic. SmallVector<SDValue, 8> Ops; - // Add the <id> and <numNopBytes> constants. - for (unsigned i = 0; i < 2; ++i) { - SDValue tmp = getValue(CI.getOperand(i)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); - } + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); + // Assume that the Callee is a constant address. + // FIXME: handle function symbols in the future. Ops.push_back( DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), /*isTarget=*/true)); @@ -6909,25 +7020,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Add the arguments we omitted previously. The register allocator should // place these in any free register. if (isAnyRegCC) - for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) Ops.push_back(getValue(CI.getArgOperand(i))); - // Push the arguments from the call instruction. + // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) Ops.push_back(*i); // Push live variables for the stack map. - for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) { - SDValue OpVal = getValue(CI.getArgOperand(i)); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { - Ops.push_back( - DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); - Ops.push_back( - DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); - } else - Ops.push_back(OpVal); - } + addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this); // Push the register mask info. if (hasGlue) @@ -6981,6 +7083,9 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { } else DAG.ReplaceAllUsesWith(Call, MN); DAG.DeleteNode(Call); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo @@ -7037,8 +7142,18 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setInReg(); if (Args[i].isSRet) Flags.setSRet(); - if (Args[i].isByVal) { + if (Args[i].isByVal) Flags.setByVal(); + if (Args[i].isInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Args[i].isByVal || Args[i].isInAlloca) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); @@ -7202,12 +7317,10 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return A->use_empty(); const BasicBlock *Entry = A->getParent()->begin(); - for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) { - const User *U = *UI; + for (const User *U : A->users()) if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) return false; // Use not in entry block. - } + return true; } @@ -7215,7 +7328,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); const TargetLowering *TLI = getTargetLowering(); - const DataLayout *TD = TLI->getDataLayout(); + const DataLayout *DL = TLI->getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; if (!FuncInfo->CanLowerReturn) { @@ -7247,7 +7360,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = - TD->getABITypeAlignment(ArgTy); + DL->getABITypeAlignment(ArgTy); if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); @@ -7257,11 +7370,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setInReg(); if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) { + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) Flags.setByVal(); + if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast<PointerType>(I->getType()); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); + Flags.setByValSize(DL->getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; |