diff options
author | Dale Johannesen <dalej@apple.com> | 2010-09-30 23:57:10 +0000 |
---|---|---|
committer | Dale Johannesen <dalej@apple.com> | 2010-09-30 23:57:10 +0000 |
commit | 0488fb649a56b7fc89a5814df5308813f9e5a85d (patch) | |
tree | 21913bb81960866b73c6b49f29782c142563304c /lib/Target | |
parent | a7e3b564773cc45a1b08383c09fe86b17b3ffe92 (diff) | |
download | external_llvm-0488fb649a56b7fc89a5814df5308813f9e5a85d.tar.gz external_llvm-0488fb649a56b7fc89a5814df5308813f9e5a85d.tar.bz2 external_llvm-0488fb649a56b7fc89a5814df5308813f9e5a85d.zip |
Massive rewrite of MMX:
The x86_mmx type is used for MMX intrinsics, parameters and
return values where these use MMX registers, and is also
supported in load, store, and bitcast.
Only the above operations generate MMX instructions, and optimizations
do not operate on or produce MMX intrinsics.
MMX-sized vectors <2 x i32> etc. are lowered to XMM or split into
smaller pieces. Optimizations may occur on these forms and the
result casted back to x86_mmx, provided the result feeds into a
previous existing x86_mmx operation.
The point of all this is prevent optimizations from introducing
MMX operations, which is unsafe due to the EMMS problem.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115243 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/X86/X86CallingConv.td | 19 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 271 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 51 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrMMX.td | 562 | ||||
-rw-r--r-- | lib/Target/X86/X86MCInstLower.cpp | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.td | 2 |
7 files changed, 173 insertions, 745 deletions
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index e3409effc3..487e39f8c2 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -48,7 +48,7 @@ def RetCC_X86Common : CallingConv<[ // MMX vector types are always returned in MM0. If the target doesn't have // MM0, it doesn't support these vector types. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>, + CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>, // Long double types are always returned in ST0 (even with SSE). CCIfType<[f80], CCAssignToReg<[ST0, ST1]>> @@ -95,14 +95,14 @@ def RetCC_X86_64_C : CallingConv<[ // returned in RAX. This disagrees with ABI documentation but is bug // compatible with gcc. CCIfType<[v1i64], CCAssignToReg<[RAX]>>, - CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>, + CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>, CCDelegateTo<RetCC_X86Common> ]>; // X86-Win64 C return-value convention. def RetCC_X86_Win64_C : CallingConv<[ // The X86-Win64 calling convention always returns __m64 values in RAX. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>, + CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>, // And FP in XMM0 only. CCIfType<[f32], CCAssignToReg<[XMM0]>>, @@ -161,7 +161,7 @@ def CC_X86_64_C : CallingConv<[ // The first 8 MMX (except for v1i64) vector arguments are passed in XMM // registers on Darwin. - CCIfType<[v8i8, v4i16, v2i32], + CCIfType<[x86mmx], CCIfSubtarget<"isTargetDarwin()", CCIfSubtarget<"hasSSE2()", CCPromoteToType<v2i64>>>>, @@ -192,7 +192,7 @@ def CC_X86_64_C : CallingConv<[ CCAssignToStack<32, 32>>, // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> + CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>> ]>; // Calling convention used on Win64 @@ -210,8 +210,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>, // The first 4 MMX vector arguments are passed in GPRs. - CCIfType<[v8i8, v4i16, v2i32, v1i64], - CCBitConvertToType<i64>>, + CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>, // The first 4 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ], @@ -233,7 +232,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[f80], CCAssignToStack<0, 0>>, // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> + CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>> ]>; def CC_X86_64_GHC : CallingConv<[ @@ -269,7 +268,7 @@ def CC_X86_32_Common : CallingConv<[ // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx // registers if the call is not a vararg call. - CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32], + CCIfNotVarArg<CCIfType<[x86mmx], CCAssignToReg<[MM0, MM1, MM2]>>>, // Integer/Float values get stored in stack slots that are 4 bytes in @@ -300,7 +299,7 @@ def CC_X86_32_Common : CallingConv<[ // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are // passed in the parameter area. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>; + CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>; def CC_X86_32_C : CallingConv<[ // Promote i8/i16 arguments to i32. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 55daf769cf..7da9ad6da2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -218,11 +218,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); if (Subtarget->is64Bit()) { setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand); - // Without SSE, i64->f64 goes through memory; i64->MMX is Legal. - if (Subtarget->hasMMX() && !DisableMMX) - setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom); - else - setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand); + // Without SSE, i64->f64 goes through memory. + setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand); } } @@ -615,90 +612,40 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // with -msoft-float, disable use of MMX as well. if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) { addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass, false); - - // FIXME: Remove the rest of this stuff. - addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false); - addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false); - addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false); - - addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false); - - setOperationAction(ISD::ADD, MVT::v8i8, Legal); - setOperationAction(ISD::ADD, MVT::v4i16, Legal); - setOperationAction(ISD::ADD, MVT::v2i32, Legal); - setOperationAction(ISD::ADD, MVT::v1i64, Legal); - - setOperationAction(ISD::SUB, MVT::v8i8, Legal); - setOperationAction(ISD::SUB, MVT::v4i16, Legal); - setOperationAction(ISD::SUB, MVT::v2i32, Legal); - setOperationAction(ISD::SUB, MVT::v1i64, Legal); - - setOperationAction(ISD::MULHS, MVT::v4i16, Legal); - setOperationAction(ISD::MUL, MVT::v4i16, Legal); - - setOperationAction(ISD::AND, MVT::v8i8, Promote); - AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::AND, MVT::v4i16, Promote); - AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::AND, MVT::v2i32, Promote); - AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::AND, MVT::v1i64, Legal); - - setOperationAction(ISD::OR, MVT::v8i8, Promote); - AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::OR, MVT::v4i16, Promote); - AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::OR, MVT::v2i32, Promote); - AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::OR, MVT::v1i64, Legal); - - setOperationAction(ISD::XOR, MVT::v8i8, Promote); - AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::XOR, MVT::v4i16, Promote); - AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::XOR, MVT::v2i32, Promote); - AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::XOR, MVT::v1i64, Legal); - - setOperationAction(ISD::LOAD, MVT::v8i8, Promote); - AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v4i16, Promote); - AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v2i32, Promote); - AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v1i64, Legal); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); - - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); - - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); - - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); - - setOperationAction(ISD::SELECT, MVT::v8i8, Promote); - setOperationAction(ISD::SELECT, MVT::v4i16, Promote); - setOperationAction(ISD::SELECT, MVT::v2i32, Promote); - setOperationAction(ISD::SELECT, MVT::v1i64, Custom); - setOperationAction(ISD::VSETCC, MVT::v8i8, Custom); - setOperationAction(ISD::VSETCC, MVT::v4i16, Custom); - setOperationAction(ISD::VSETCC, MVT::v2i32, Custom); - - if (!X86ScalarSSEf64 && Subtarget->is64Bit()) { - setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom); - } - } + // No operations on x86mmx supported, everything uses intrinsics. + } + + // MMX-sized vectors (other than x86mmx) are expected to be expanded + // into smaller operations. + setOperationAction(ISD::MULHS, MVT::v8i8, Expand); + setOperationAction(ISD::MULHS, MVT::v4i16, Expand); + setOperationAction(ISD::MULHS, MVT::v2i32, Expand); + setOperationAction(ISD::MULHS, MVT::v1i64, Expand); + setOperationAction(ISD::AND, MVT::v8i8, Expand); + setOperationAction(ISD::AND, MVT::v4i16, Expand); + setOperationAction(ISD::AND, MVT::v2i32, Expand); + setOperationAction(ISD::AND, MVT::v1i64, Expand); + setOperationAction(ISD::OR, MVT::v8i8, Expand); + setOperationAction(ISD::OR, MVT::v4i16, Expand); + setOperationAction(ISD::OR, MVT::v2i32, Expand); + setOperationAction(ISD::OR, MVT::v1i64, Expand); + setOperationAction(ISD::XOR, MVT::v8i8, Expand); + setOperationAction(ISD::XOR, MVT::v4i16, Expand); + setOperationAction(ISD::XOR, MVT::v2i32, Expand); + setOperationAction(ISD::XOR, MVT::v1i64, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand); + setOperationAction(ISD::SELECT, MVT::v8i8, Expand); + setOperationAction(ISD::SELECT, MVT::v4i16, Expand); + setOperationAction(ISD::SELECT, MVT::v2i32, Expand); + setOperationAction(ISD::SELECT, MVT::v1i64, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Expand); if (!UseSoftFloat && Subtarget->hasSSE1()) { addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); @@ -821,10 +768,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); - if (!DisableMMX && Subtarget->hasMMX()) { - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - } } if (Subtarget->hasSSE41()) { @@ -1210,8 +1153,7 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{ RRC = (Subtarget->is64Bit() ? X86::GR64RegisterClass : X86::GR32RegisterClass); break; - case MVT::v8i8: case MVT::v4i16: - case MVT::v2i32: case MVT::v1i64: + case MVT::x86mmx: RRC = X86::VR64RegisterClass; break; case MVT::f32: case MVT::f64: @@ -1345,12 +1287,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 // which is returned in RAX / RDX. if (Subtarget->is64Bit()) { - if (ValVT.isVector() && ValVT.getSizeInBits() == 64) { - ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); + if (ValVT == MVT::x86mmx) { if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { + ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); - // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. if (!Subtarget->hasSSE2()) @@ -1650,7 +1591,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, RC = X86::VR256RegisterClass; else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) RC = X86::VR128RegisterClass; - else if (RegVT.isVector() && RegVT.getSizeInBits() == 64) + else if (RegVT == MVT::x86mmx) RC = X86::VR64RegisterClass; else llvm_unreachable("Unknown argument type!"); @@ -1673,9 +1614,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isExtInLoc()) { // Handle MMX values passed in XMM regs. if (RegVT.isVector()) { - ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, - ArgValue, DAG.getConstant(0, MVT::i64)); - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), + ArgValue); } else ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); } @@ -2876,7 +2816,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) { /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference /// the second operand. static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) { - if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) + if (VT == MVT::v4f32 || VT == MVT::v4i32 ) return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); if (VT == MVT::v2f64 || VT == MVT::v2i64) return (Mask[0] < 2 && Mask[1] < 2); @@ -3548,13 +3488,10 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted + // Always build SSE zero vectors as <4 x i32> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; - if (VT.getSizeInBits() == 64) { // MMX - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - } else if (VT.getSizeInBits() == 128) { + if (VT.getSizeInBits() == 128) { // SSE if (HasSSE2) { // SSE2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); @@ -3582,10 +3519,7 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { // type. This ensures they get CSE'd. SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); SDValue Vec; - if (VT.getSizeInBits() == 64) // MMX - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - else // SSE - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } @@ -4025,8 +3959,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc dl) { - bool isMMX = VT.getSizeInBits() == 64; - EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; + EVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, @@ -4180,10 +4113,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ISD::isBuildVectorAllZeros(Op.getNode()) || (Op.getValueType().getSizeInBits() != 256 && ISD::isBuildVectorAllOnes(Op.getNode()))) { - // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to + // Canonicalize this to <4 x i32> (SSE) to // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are // eliminated on x86-32 hosts. - if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) + if (Op.getValueType() == MVT::v4i32) return Op; if (ISD::isBuildVectorAllOnes(Op.getNode())) @@ -4234,9 +4167,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i64 && !Subtarget->is64Bit() && (!IsAllConstants || Idx == 0)) { if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { - // Handle MMX and SSE both. - EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; - unsigned VecElts = VT == MVT::v2i64 ? 4 : 2; + // Handle SSE only. + assert(VT == MVT::v2i64 && "Expected an SSE value type!"); + EVT VecVT = MVT::v4i32; + unsigned VecElts = 4; // Truncate the value (which may itself be a constant) to i32, and // convert it to a vector with movd (S2V+shuffle to zero extend). @@ -4275,7 +4209,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DAG); } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; + assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); + EVT MiddleVT = MVT::v4i32; Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), DAG); @@ -5418,11 +5353,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); - // FIXME: this is somehow handled during isel by MMX pattern fragments. Remove - // the check or come up with another solution when all MMX move to intrinsics, - // but don't allow this to be considered legal, we don't want vector_shuffle - // operations to be matched during isel anymore. - if (isMMX && SVOp->isSplat()) + // Shuffle operations on MMX not supported. + if (isMMX) return Op; // Vector shuffle lowering takes 3 steps: @@ -5456,10 +5388,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef && - RelaxedMayFoldVectorLoad(V1) && !isMMX) + RelaxedMayFoldVectorLoad(V1)) return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG); - if (!isMMX && X86::isMOVHLPS_v_undef_Mask(SVOp)) + if (X86::isMOVHLPS_v_undef_Mask(SVOp)) return getMOVHighToLow(Op, dl, DAG); // Use to match splats @@ -5507,7 +5439,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); - if (!isMMX && !X86::isMOVLPMask(SVOp)) { + if (!X86::isMOVLPMask(SVOp)) { if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); @@ -5517,22 +5449,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } // FIXME: fold these into legal mask. - if (!isMMX) { - if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) - return getMOVLowToHigh(Op, dl, DAG, HasSSE2); + if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) + return getMOVLowToHigh(Op, dl, DAG, HasSSE2); - if (X86::isMOVHLPSMask(SVOp)) - return getMOVHighToLow(Op, dl, DAG); + if (X86::isMOVHLPSMask(SVOp)) + return getMOVHighToLow(Op, dl, DAG); - if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) - return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); + if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); - if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) - return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); + if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); - if (X86::isMOVLPMask(SVOp)) - return getMOVLP(Op, dl, DAG, HasSSE2); - } + if (X86::isMOVLPMask(SVOp)) + return getMOVLP(Op, dl, DAG, HasSSE2); if (ShouldXformToMOVHLPS(SVOp) || ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) @@ -5573,12 +5503,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } if (X86::isUNPCKLMask(SVOp)) - return (isMMX) ? - Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); if (X86::isUNPCKHMask(SVOp)) - return (isMMX) ? - Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -5602,18 +5530,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); if (X86::isUNPCKLMask(NewSVOp)) - return (isMMX) ? - NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); if (X86::isUNPCKHMask(NewSVOp)) - return (isMMX) ? - NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); } - // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. - // Normalize the node to match x86 shuffle ops if needed - if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) + if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) return CommuteVectorShuffle(SVOp, DAG); // The checks below are all present in isShuffleMaskLegal, but they are @@ -5627,12 +5551,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { X86::getShufflePALIGNRImmediate(SVOp), DAG); - // Only a few shuffle masks are handled for 64-bit vectors (MMX), and - // 64-bit vectors which made to this point can't be handled, they are - // expanded. - if (isMMX) - return SDValue(); - if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { if (VT == MVT::v2f64) @@ -5681,8 +5599,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return NewOp; } - // Handle all 4 wide cases with a number of shuffles except for MMX. - if (NumElems == 4 && !isMMX) + // Handle all 4 wide cases with a number of shuffles. + if (NumElems == 4) return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); return SDValue(); @@ -5824,8 +5742,6 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, unsigned Opc; if (VT == MVT::v8i16) Opc = X86ISD::PINSRW; - else if (VT == MVT::v4i16) - Opc = X86ISD::MMX_PINSRW; else if (VT == MVT::v16i8) Opc = X86ISD::PINSRB; else @@ -5881,8 +5797,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); - return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW, - dl, VT, N0, N1, N2); + return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2); } return SDValue(); } @@ -5896,16 +5811,10 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); - EVT VT = MVT::v2i32; - switch (Op.getValueType().getSimpleVT().SimpleTy) { - default: break; - case MVT::v16i8: - case MVT::v8i16: - VT = MVT::v4i32; - break; - } + assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 && + "Expected an SSE type!"); return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt)); + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt)); } // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as @@ -6322,11 +6231,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); - if (SrcVT.isVector()) { - if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) - return Op; + if (SrcVT.isVector()) return SDValue(); - } assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); @@ -6702,13 +6608,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { - if (Op.getValueType().isVector()) { - if (Op.getValueType() == MVT::v2i32 && - Op.getOperand(0).getValueType() == MVT::v2f64) { - return Op; - } + if (Op.getValueType().isVector()) return SDValue(); - } std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true); SDValue FIST = Vals.first, StackSlot = Vals.second; @@ -7211,11 +7112,8 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { switch (VT.getSimpleVT().SimpleTy) { default: break; - case MVT::v8i8: case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; - case MVT::v4i16: case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; - case MVT::v2i32: case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; } @@ -7930,6 +7828,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4); } else { ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); +// FIXME this must be lowered to get rid of the invalid type. } EVT VT = Op.getValueType(); @@ -8840,7 +8739,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; - case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; @@ -9711,7 +9609,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); case X86::CMOV_GR8: - case X86::CMOV_V1I64: case X86::CMOV_FR32: case X86::CMOV_FR64: case X86::CMOV_V4F32: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 2a86fa8c70..1a2da7401b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -128,11 +128,15 @@ namespace llvm { /// relative displacements. WrapperRIP, - /// MOVQ2DQ - Copies a 64-bit value from a vector to another vector. - /// Can be used to move a vector value from a MMX register to a XMM - /// register. + /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word + /// of an XMM vector, with the high word zero filled. MOVQ2DQ, + /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector + /// to an MMX vector. If you think this is too close to the previous + /// mnemonic, so do I; blame Intel. + MOVDQ2Q, + /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRB. PEXTRB, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 104f880032..70c3d076d5 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -15,51 +15,8 @@ // MMX Pattern Fragments //===----------------------------------------------------------------------===// -def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>; - -def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>; -def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>; -def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>; -def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>; - -//===----------------------------------------------------------------------===// -// MMX Masks -//===----------------------------------------------------------------------===// - -// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to -// PSHUFW imm. -def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShuffleSHUFImmediate(N)); -}]>; - -// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> -def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> -def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> -def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> -def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); -}], MMX_SHUFFLE_get_shuf_imm>; +def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>; +def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>; //===----------------------------------------------------------------------===// // SSE specific DAG Nodes. @@ -286,9 +243,7 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 8; }]>; -def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>; -def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; -def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; +def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>; // MOVNT Support // Like 'store', but requires the non-temporal bit to be set diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index b9416bee25..a92776156b 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -21,28 +21,7 @@ //===----------------------------------------------------------------------===// let Constraints = "$src1 = $dst" in { - // MMXI_binop_rm - Simple MMX binary operator based on llvm operator. - multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Commutable = 0> { - def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> { - let isCommutable = Commutable; - } - def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (OpVT (OpNode VR64:$src1, - (bitconvert - (load_mmx addr:$src2)))))]>; - } - - // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic, with a - // different name for the generated instructions than MMXI_binop_rm uses. - // Thus int and rm can coexist for different implementations of the same - // instruction. This is temporary during transition to intrinsic-only - // implementation; eventually the non-intrinsic forms will go away. When + // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp. multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, bit Commutable = 0> { @@ -59,26 +38,6 @@ let Constraints = "$src1 = $dst" in { (bitconvert (load_mmx addr:$src2))))]>; } - // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64. - // - // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew - // to collapse (bitconvert VT to VT) into its operand. - // - multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode, - bit Commutable = 0> { - def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> { - let isCommutable = Commutable; - } - def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, - (OpNode VR64:$src1,(load_mmx addr:$src2)))]>; - } - multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, Intrinsic IntId2> { @@ -100,7 +59,7 @@ let Constraints = "$src1 = $dst" in { /// Unary MMX instructions requiring SSSE3. multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, - PatFrag mem_frag64, Intrinsic IntId64> { + Intrinsic IntId64> { def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 VR64:$src))]>; @@ -108,13 +67,13 @@ multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, - (IntId64 (bitconvert (mem_frag64 addr:$src))))]>; + (IntId64 (bitconvert (memopmmx addr:$src))))]>; } /// Binary MMX instructions requiring SSSE3. let ImmT = NoImm, Constraints = "$src1 = $dst" in { multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, - PatFrag mem_frag64, Intrinsic IntId64> { + Intrinsic IntId64> { let isCommutable = 0 in def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), @@ -125,18 +84,12 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId64 VR64:$src1, - (bitconvert (mem_frag64 addr:$src2))))]>; + (bitconvert (memopmmx addr:$src2))))]>; } } /// PALIGN MMX instructions (require SSSE3). multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { - def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; - def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), @@ -184,12 +137,12 @@ def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (scalar_to_vector GR32:$src)))]>; -let canFoldAsLoad = 1, isReMaterializable = 1 in + (x86mmx (scalar_to_vector GR32:$src)))]>; +let canFoldAsLoad = 1 in def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>; + (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>; let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; @@ -201,42 +154,41 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; -let neverHasSideEffects = 1 in // These are 64 bit moves, but since the OS X assembler doesn't // recognize a register-register movq, we write them as // movd. def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), - "movd\t{$src, $dst|$dst, $src}", []>; + "movd\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, + (bitconvert VR64:$src))]>; def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v1i64 (scalar_to_vector GR64:$src)))]>; - + (bitconvert GR64:$src))]>; let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1 in def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (load_mmx addr:$src))]>; def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", - [(store (v1i64 VR64:$src), addr:$dst)]>; + [(store (x86mmx VR64:$src), addr:$dst)]>; def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v1i64 (bitconvert + (x86mmx (bitconvert (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))))))]>; def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (movl immAllZerosV, - (v2i64 (scalar_to_vector - (i64 (bitconvert (v1i64 VR64:$src)))))))]>; + (v2i64 (scalar_to_vector + (i64 (bitconvert (x86mmx VR64:$src))))))]>; let neverHasSideEffects = 1 in def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), @@ -254,54 +206,40 @@ let AddedComplexity = 15 in def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>; + (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>; let AddedComplexity = 20 in def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (X86vzmovl (v2i32 + (x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))))]>; // Arithmetic Instructions -defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", memopv8i8, - int_x86_ssse3_pabs_b>; -defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", memopv4i16, - int_x86_ssse3_pabs_w>; -defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", memopv2i32, - int_x86_ssse3_pabs_d>; +defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>; +defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>; +defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>; // -- Addition -defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8, 1>, - MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>; -defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>, - MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>; -defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>, - MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>; -defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>, - MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>; +defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>; +defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>; +defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>; +defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>; defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>; defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>; defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>; defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>; -defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", memopv4i16, - int_x86_ssse3_phadd_w>; -defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", memopv2i32, - int_x86_ssse3_phadd_d>; -defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw", memopv4i16, - int_x86_ssse3_phadd_sw>; +defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>; +defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>; +defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>; // -- Subtraction -defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>, - MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>; -defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>, - MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>; -defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>, - MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>; -defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>, - MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>; +defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>; +defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>; +defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>; +defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>; defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>; defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>; @@ -309,28 +247,24 @@ defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>; defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>; defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>; -defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", memopv4i16, - int_x86_ssse3_phsub_w>; -defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", memopv2i32, - int_x86_ssse3_phsub_d>; -defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw", memopv4i16, - int_x86_ssse3_phsub_sw>; +defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>; +defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>; +defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>; // -- Multiplication -defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>, - MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>; +defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>; defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>; defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>; defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>; let isCommutable = 1 in -defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", memopv4i16, +defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", int_x86_ssse3_pmul_hr_sw>; // -- Miscellanea defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>; -defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw", memopv8i8, +defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw", int_x86_ssse3_pmadd_ub_sw>; defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>; defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>; @@ -343,57 +277,18 @@ defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>; defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>; -defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", memopv8i8, - int_x86_ssse3_psign_b>; -defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", memopv4i16, - int_x86_ssse3_psign_w>; -defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", memopv2i32, - int_x86_ssse3_psign_d>; +defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>; +defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>; +defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>; let Constraints = "$src1 = $dst" in defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>; -let AddedComplexity = 5 in { - -def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)), - (MMX_PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)), - (MMX_PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)), - (MMX_PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)), - (MMX_PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; -} - // Logical Instructions -defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>, - MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>; -defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>, - MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>; -defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>, - MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>; +defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>; +defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>; +defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>; defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, 1>; -let Constraints = "$src1 = $dst" in { - def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1), - VR64:$src2)))]>; - def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1), - (load addr:$src2))))]>; -} - // Shift Instructions defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>; @@ -414,12 +309,6 @@ defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_mmx_psra_d, int_x86_mmx_psrai_d>; -// Shift up / down and insert zero's. -def : Pat<(v1i64 (X86vshl VR64:$src, (i8 imm:$amt))), - (MMX_PSLLQri VR64:$src, (GetLo32XForm imm:$amt))>; -def : Pat<(v1i64 (X86vshr VR64:$src, (i8 imm:$amt))), - (MMX_PSRLQri VR64:$src, (GetLo32XForm imm:$amt))>; - // Comparison Instructions defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>; defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>; @@ -429,84 +318,7 @@ defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>; defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>; defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>; -// Conversion Instructions - // -- Unpack Instructions -let Constraints = "$src1 = $dst" in { - // Unpack High Packed Data Instructions - def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v8i8 (mmx_unpckh VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)))))]>; - - def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpckhwd\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpckhwd\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_unpckh VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)))))]>; - - def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpckhdq\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpckhdq\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v2i32 (mmx_unpckh VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)))))]>; - - // Unpack Low Packed Data Instructions - def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpcklbw\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpcklbw\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v8i8 (mmx_unpckl VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)))))]>; - - def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpcklwd\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpcklwd\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_unpckl VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)))))]>; - - def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpckldq\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpckldq\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v2i32 (mmx_unpckl VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)))))]>; -} defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw", int_x86_mmx_punpckhbw>; defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd", @@ -526,61 +338,9 @@ defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>; defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>; // -- Shuffle Instructions -def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2), - "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>; -def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, - (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2), - "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR64:$dst, - (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)), - (undef)))]>; - -defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", memopv8i8, - int_x86_ssse3_pshuf_b>; -// Shuffle with PALIGN -def : Pat<(v1i64 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), - (MMX_PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; -def : Pat<(v2i32 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), - (MMX_PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; -def : Pat<(v4i16 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), - (MMX_PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; -def : Pat<(v8i8 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), - (MMX_PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; +defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>; // -- Conversion Instructions -let neverHasSideEffects = 1 in { -def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), - "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), - (ins i64mem:$src), - "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), - "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), - (ins i64mem:$src), - "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), - (ins f128mem:$src), - "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvttps2pi\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), - "cvttps2pi\t{$src, $dst|$dst, $src}", []>; -} // end neverHasSideEffects - -// Intrinsic forms. defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi, f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}", SSEPackedSingle>, TB; @@ -602,43 +362,14 @@ let Constraints = "$src1 = $dst" in { i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}", SSEPackedSingle>, TB; } -// MMX->MMX vector casts. -def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))), - (MMX_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>; -def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))), - (MMX_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>; // Extract / Insert -def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW", - SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>, - SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; - - -def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg, - (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2), - "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1), - (iPTR imm:$src2)))]>; def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1, (iPTR imm:$src2)))]>; let Constraints = "$src1 = $dst" in { - def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg, - (outs VR64:$dst), - (ins VR64:$src1, GR32:$src2,i16i8imm:$src3), - "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), - GR32:$src2,(iPTR imm:$src3))))]>; - def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem, - (outs VR64:$dst), - (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3), - "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR64:$dst, - (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), - (i32 (anyext (loadi16 addr:$src2))), - (iPTR imm:$src3))))]>; def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, i32i8imm:$src3), @@ -655,9 +386,16 @@ let Constraints = "$src1 = $dst" in { (iPTR imm:$src3)))]>; } +// Mask creation +def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, + (int_x86_mmx_pmovmskb VR64:$src))]>; + + // MMX to XMM for vector types def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1, - [SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>; + [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>; def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; @@ -665,14 +403,19 @@ def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))), (v2i64 (MOVQI2PQIrm addr:$src))>; -def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert - (v2i32 (scalar_to_vector (loadi32 addr:$src))))))), +def : Pat<(v2i64 (MMX_X86movq2dq + (x86mmx (scalar_to_vector (loadi32 addr:$src))))), (v2i64 (MOVDI2PDIrm addr:$src))>; -// Mask creation -def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), - "pmovmskb\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>; +// Low word of XMM to MMX. +def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1, + [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>; + +def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)), + (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>; + +def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))), + (x86mmx (MMX_MOVQ64rm addr:$src))>; // Misc. let Uses = [EDI] in @@ -684,181 +427,14 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>; -//===----------------------------------------------------------------------===// -// Alias Instructions -//===----------------------------------------------------------------------===// - -// Alias instructions that map zero vector to pxor. -let isReMaterializable = 1, isCodeGenOnly = 1 in { - // FIXME: Change encoding to pseudo. - def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "", - [(set VR64:$dst, (v2i32 immAllZerosV))]>; - def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), "", - [(set VR64:$dst, (v2i32 immAllOnesV))]>; -} - -let Predicates = [HasMMX] in { - def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>; - def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>; - def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>; -} - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -//===----------------------------------------------------------------------===// - -// Store 64-bit integer vector values. -def : Pat<(store (v8i8 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -def : Pat<(store (v4i16 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -def : Pat<(store (v2i32 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -def : Pat<(store (v1i64 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; - -// Bit convert. -def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>; -def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>; -def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>; -def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>; -def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>; - // 64-bit bit convert. -def : Pat<(v1i64 (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(v2i32 (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(v4i16 (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(v8i8 (bitconvert (i64 GR64:$src))), +def : Pat<(x86mmx (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(i64 (bitconvert (v1i64 VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(i64 (bitconvert (v2i32 VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(i64 (bitconvert (v4i16 VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(i64 (bitconvert (v8i8 VR64:$src))), +def : Pat<(i64 (bitconvert (x86mmx VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(f64 (bitconvert (v1i64 VR64:$src))), +def : Pat<(f64 (bitconvert (x86mmx VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; -def : Pat<(f64 (bitconvert (v2i32 VR64:$src))), - (MMX_MOVQ2FR64rr VR64:$src)>; -def : Pat<(f64 (bitconvert (v4i16 VR64:$src))), - (MMX_MOVQ2FR64rr VR64:$src)>; -def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), - (MMX_MOVQ2FR64rr VR64:$src)>; -def : Pat<(v1i64 (bitconvert (f64 FR64:$src))), - (MMX_MOVFR642Qrr FR64:$src)>; -def : Pat<(v2i32 (bitconvert (f64 FR64:$src))), - (MMX_MOVFR642Qrr FR64:$src)>; -def : Pat<(v4i16 (bitconvert (f64 FR64:$src))), - (MMX_MOVFR642Qrr FR64:$src)>; -def : Pat<(v8i8 (bitconvert (f64 FR64:$src))), +def : Pat<(x86mmx (bitconvert (f64 FR64:$src))), (MMX_MOVFR642Qrr FR64:$src)>; -let AddedComplexity = 20 in { - def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; -} - -// Clear top half. -let AddedComplexity = 15 in { - def : Pat<(v2i32 (X86vzmovl VR64:$src)), - (MMX_PUNPCKLDQrr VR64:$src, (v2i32 (MMX_V_SET0)))>; -} - -// Patterns to perform canonical versions of vector shuffling. -let AddedComplexity = 10 in { - def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))), - (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))), - (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))), - (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>; -} - -let AddedComplexity = 10 in { - def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))), - (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))), - (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))), - (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; -} -// Some special case PANDN patterns. -// FIXME: Get rid of these. -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), - VR64:$src2)), - (MMX_PANDNrr VR64:$src1, VR64:$src2)>; -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), - (load addr:$src2))), - (MMX_PANDNrm VR64:$src1, addr:$src2)>; - -// Move MMX to lower 64-bit of XMM -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v4i16 VR64:$src))))), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v2i32 VR64:$src))))), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v1i64 VR64:$src))))), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; - -// Move lower 64-bit of XMM to MMX. -def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>; -def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>; -def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>; - -// Patterns for vector comparisons -def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, VR64:$src2)), - (MMX_PCMPEQBirr VR64:$src1, VR64:$src2)>; -def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPEQBirm VR64:$src1, addr:$src2)>; -def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, VR64:$src2)), - (MMX_PCMPEQWirr VR64:$src1, VR64:$src2)>; -def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPEQWirm VR64:$src1, addr:$src2)>; -def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, VR64:$src2)), - (MMX_PCMPEQDirr VR64:$src1, VR64:$src2)>; -def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPEQDirm VR64:$src1, addr:$src2)>; - -def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, VR64:$src2)), - (MMX_PCMPGTBirr VR64:$src1, VR64:$src2)>; -def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPGTBirm VR64:$src1, addr:$src2)>; -def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, VR64:$src2)), - (MMX_PCMPGTWirr VR64:$src1, VR64:$src2)>; -def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPGTWirm VR64:$src1, addr:$src2)>; -def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)), - (MMX_PCMPGTDirr VR64:$src1, VR64:$src2)>; -def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPGTDirm VR64:$src1, addr:$src2)>; - -// CMOV* - Used to implement the SELECT DAG operation. Expanded after -// instruction selection into a branch sequence. -let Uses = [EFLAGS], usesCustomInserter = 1 in { - def CMOV_V1I64 : I<0, Pseudo, - (outs VR64:$dst), (ins VR64:$t, VR64:$f, i8imm:$cond), - "#CMOV_V1I64 PSEUDO!", - [(set VR64:$dst, - (v1i64 (X86cmov VR64:$t, VR64:$f, imm:$cond, - EFLAGS)))]>; -} diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 952796478c..29ee2bfe8b 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -377,9 +377,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break; case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break; case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; - case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break; - case X86::MMX_V_SETALLONES: - LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDirr); break; case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 7b34391bf9..959c01ebae 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -792,7 +792,7 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, } // Generic vector registers: VR64 and VR128. -def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64, +def VR64: RegisterClass<"X86", [x86mmx], 64, [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>; def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, |