diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 222 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 21 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 57 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 2 |
4 files changed, 292 insertions, 10 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e90c9303f3..2df8af5e50 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -292,10 +292,12 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) if (!Subtarget->hasSSE2()) setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + // Expand certain atomics setOperationAction(ISD::ATOMIC_LCS , MVT::i8, Custom); setOperationAction(ISD::ATOMIC_LCS , MVT::i16, Custom); setOperationAction(ISD::ATOMIC_LCS , MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LCS , MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LSS , MVT::i32, Expand); // Use the default ISD::LOCATION, ISD::DECLARE expansion. setOperationAction(ISD::LOCATION, MVT::Other, Expand); @@ -5511,6 +5513,15 @@ SDNode* X86TargetLowering::ExpandATOMIC_LCS(SDNode* Op, SelectionDAG &DAG) { return DAG.getNode(ISD::MERGE_VALUES, Tys, ResultVal, cpOutH.getValue(1)).Val; } +SDNode* X86TargetLowering::ExpandATOMIC_LSS(SDNode* Op, SelectionDAG &DAG) { + MVT::ValueType T = cast<AtomicSDNode>(Op)->getVT(); + assert (T == MVT::i32 && "Only know how to expand i32 LSS"); + SDOperand negOp = DAG.getNode(ISD::SUB, T, + DAG.getConstant(0, T), Op->getOperand(2)); + return DAG.getAtomic(ISD::ATOMIC_LAS, Op->getOperand(0), + Op->getOperand(1), negOp, T).Val; +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { @@ -5568,6 +5579,7 @@ SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); case ISD::ATOMIC_LCS: return ExpandATOMIC_LCS(N, DAG); + case ISD::ATOMIC_LSS: return ExpandATOMIC_LSS(N,DAG); } } @@ -5732,6 +5744,187 @@ X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDOperand> &BVOps, // X86 Scheduler Hooks //===----------------------------------------------------------------------===// +// private utility function +MachineBasicBlock * +X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, + MachineBasicBlock *MBB, + unsigned regOpc, + unsigned immOpc) { + // For the atomic bitwise operator, we generate + // thisMBB: + // newMBB: + // ld EAX = [bitinstr.addr] + // mov t1 = EAX + // op t2 = t1, [bitinstr.val] + // lcs dest = [bitinstr.addr], t2 [EAX is implicit] + // bz newMBB + // fallthrough -->nextMBB + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + ilist<MachineBasicBlock>::iterator MBBIter = MBB; + ++MBBIter; + + /// First build the CFG + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB); + MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB); + F->getBasicBlockList().insert(MBBIter, newMBB); + F->getBasicBlockList().insert(MBBIter, nextMBB); + + // Move all successors to thisMBB to nextMBB + nextMBB->transferSuccessors(thisMBB); + + // Update thisMBB to fall through to newMBB + thisMBB->addSuccessor(newMBB); + + // newMBB jumps to itself and fall through to nextMBB + newMBB->addSuccessor(nextMBB); + newMBB->addSuccessor(newMBB); + + // Insert instructions into newMBB based on incoming instruction + assert(bInstr->getNumOperands() < 8 && "unexpected number of operands"); + MachineOperand& destOper = bInstr->getOperand(0); + MachineOperand* argOpers[6]; + int numArgs = bInstr->getNumOperands() - 1; + for (int i=0; i < numArgs; ++i) + argOpers[i] = &bInstr->getOperand(i+1); + + // x86 address has 4 operands: base, index, scale, and displacement + int lastAddrIndx = 3; // [0,3] + int valArgIndx = 4; + + MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), X86::EAX); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + + unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t1); + MIB.addReg(X86::EAX); + + unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm()) + && "invalid operand"); + if (argOpers[valArgIndx]->isReg()) + MIB = BuildMI(newMBB, TII->get(regOpc), t2); + else + MIB = BuildMI(newMBB, TII->get(immOpc), t2); + MIB.addReg(t1); + (*MIB).addOperand(*argOpers[valArgIndx]); + + MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32)); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + MIB.addReg(t2); + + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg()); + MIB.addReg(X86::EAX); + + // insert branch + BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); + + delete bInstr; // The pseudo instruction is gone now. + return nextMBB; +} + +// private utility function +MachineBasicBlock * +X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, + MachineBasicBlock *MBB, + unsigned cmovOpc) { + // For the atomic min/max operator, we generate + // thisMBB: + // newMBB: + // ld EAX = [min/max.addr] + // mov t1 = EAX + // mov t2 = [min/max.val] + // cmp t1, t2 + // cmov[cond] t2 = t1 + // lcs dest = [bitinstr.addr], t2 [EAX is implicit] + // bz newMBB + // fallthrough -->nextMBB + // + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + ilist<MachineBasicBlock>::iterator MBBIter = MBB; + ++MBBIter; + + /// First build the CFG + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB); + MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB); + F->getBasicBlockList().insert(MBBIter, newMBB); + F->getBasicBlockList().insert(MBBIter, nextMBB); + + // Move all successors to thisMBB to nextMBB + nextMBB->transferSuccessors(thisMBB); + + // Update thisMBB to fall through to newMBB + thisMBB->addSuccessor(newMBB); + + // newMBB jumps to newMBB and fall through to nextMBB + newMBB->addSuccessor(nextMBB); + newMBB->addSuccessor(newMBB); + + // Insert instructions into newMBB based on incoming instruction + assert(mInstr->getNumOperands() < 8 && "unexpected number of operands"); + MachineOperand& destOper = mInstr->getOperand(0); + MachineOperand* argOpers[6]; + int numArgs = mInstr->getNumOperands() - 1; + for (int i=0; i < numArgs; ++i) + argOpers[i] = &mInstr->getOperand(i+1); + + // x86 address has 4 operands: base, index, scale, and displacement + int lastAddrIndx = 3; // [0,3] + int valArgIndx = 4; + + MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), X86::EAX); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + + unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t1); + MIB.addReg(X86::EAX); + + // We only support register and immediate values + assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm()) + && "invalid operand"); + + unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + if (argOpers[valArgIndx]->isReg()) + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); + else + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); + (*MIB).addOperand(*argOpers[valArgIndx]); + + MIB = BuildMI(newMBB, TII->get(X86::CMP32rr)); + MIB.addReg(t1); + MIB.addReg(t2); + + // Generate movc + unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + MIB = BuildMI(newMBB, TII->get(cmovOpc),t3); + MIB.addReg(t2); + MIB.addReg(t1); + + // Cmp and exchange if none has modified the memory location + MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32)); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + MIB.addReg(t3); + + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg()); + MIB.addReg(X86::EAX); + + // insert branch + BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); + + delete mInstr; // The pseudo instruction is gone now. + return nextMBB; +} + + MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) { @@ -5766,15 +5959,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); F->getBasicBlockList().insert(It, copy0MBB); F->getBasicBlockList().insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current + // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - sinkMBB->addSuccessor(*i); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while(!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + sinkMBB->transferSuccessors(BB); + + // Add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -5874,6 +6063,23 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, delete MI; // The pseudo instruction is gone now. return BB; } + case X86::ATOMAND32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, + X86::AND32ri); + case X86::ATOMOR32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, + X86::OR32ri); + case X86::ATOMXOR32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, + X86::XOR32ri); + case X86::ATOMMIN32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); + case X86::ATOMMAX32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr); + case X86::ATOMUMIN32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); + case X86::ATOMUMAX32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 55ad70e642..287903913e 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -346,6 +346,7 @@ namespace llvm { virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB); + /// getTargetNodeName - This method returns the name of a target specific /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; @@ -524,7 +525,8 @@ namespace llvm { SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG); SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG); SDNode *ExpandATOMIC_LCS(SDNode *N, SelectionDAG &DAG); - + SDNode *ExpandATOMIC_LSS(SDNode *N, SelectionDAG &DAG); + SDOperand EmitTargetCodeForMemset(SelectionDAG &DAG, SDOperand Chain, SDOperand Dst, SDOperand Src, @@ -537,6 +539,23 @@ namespace llvm { bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff); + + /// Utility function to emit atomic bitwise operations (and, or, xor). + // It takes the bitwise instruction to expand, the associated machine basic + // block, and the associated X86 opcodes for reg/reg and reg/imm. + MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter( + MachineInstr *BInstr, + MachineBasicBlock *BB, + unsigned regOpc, + unsigned immOpc); + + /// Utility function to emit atomic min and max. It takes the min/max + // instruction to expand, the associated basic block, and the associated + // cmov opcode for moving the min or max value. + MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr, + MachineBasicBlock *BB, + unsigned cmovOpc); + }; } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index c539eca940..af61540361 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2598,6 +2598,63 @@ def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), TB, LOCK; } +// Atomic exchange and and, or, xor +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMAND32 : I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMAND32 PSUEDO!", + [(set GR32:$dst, (atomic_load_and addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMOR32 : I<0xC1, MRMSrcMem, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMOR32 PSUEDO!", + [(set GR32:$dst, (atomic_load_or addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMXOR32 : I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMXOR32 PSUEDO!", + [(set GR32:$dst, (atomic_load_xor addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMMIN32: I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), + "#ATOMMIN32 PSUEDO!", + [(set GR32:$dst, (atomic_load_min addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMMAX32: I<0xC1, MRMSrcMem, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMMAX32 PSUEDO!", + [(set GR32:$dst, (atomic_load_max addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMUMIN32: I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMUMIN32 PSUEDO!", + [(set GR32:$dst, (atomic_load_umin addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMUMAX32: I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMUMAX32 PSUEDO!", + [(set GR32:$dst, (atomic_load_umax addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 2dd7b45974..c0040779a9 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -247,7 +247,7 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) , MaxInlineSizeThreshold(128) , Is64Bit(is64Bit) , TargetType(isELF) { // Default to ELF unless otherwise specified. - + // Determine default and user specified characteristics if (!FS.empty()) { // If feature string is not empty, parse features string. |