diff options
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 68 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/compare.ll | 85 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll | 149 |
3 files changed, 288 insertions, 14 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f50e5e7e76..9061ae9f76 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -96,7 +96,7 @@ private: ArrayRef<int> Mask) const; bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - SDLoc dl); + bool &NeedInvert, SDLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, @@ -1597,18 +1597,30 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, /// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and /// condition code CC on the current target. +/// /// If the SETCC has been legalized using AND / OR, then the legalized node -/// will be stored in LHS. RHS and CC will be set to SDValue(). +/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert +/// will be set to false. +/// /// If the SETCC has been legalized by using getSetCCSwappedOperands(), -/// then the values of LHS and RHS will be swapped and CC will be set to the -/// new condition. +/// then the values of LHS and RHS will be swapped, CC will be set to the +/// new condition, and NeedInvert will be set to false. +/// +/// If the SETCC has been legalized using the inverse condcode, then LHS and +/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert +/// will be set to true. The caller must invert the result of the SETCC with +/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a +/// true/false result. +/// /// \returns true if the SetCC has been legalized, false if it hasn't. bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + bool &NeedInvert, SDLoc dl) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + NeedInvert = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: @@ -1661,11 +1673,21 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, case ISD::SETGT: case ISD::SETGE: case ISD::SETLT: - case ISD::SETNE: - case ISD::SETEQ: // We only support using the inverted operation, which is computed above // and not a different manner of supporting expanding these cases. llvm_unreachable("Don't know how to expand this condition!"); + case ISD::SETNE: + case ISD::SETEQ: + // Try inverting the result of the inverse condition. + InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + CC = DAG.getCondCode(InvCC); + NeedInvert = true; + return true; + } + // If inverting the condition didn't work then we have no means to expand + // the condition. + llvm_unreachable("Don't know how to expand this condition!"); } SDValue SetCC1, SetCC2; @@ -2783,6 +2805,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector<SDValue, 8> Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; + bool NeedInvert; switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: @@ -3673,15 +3696,20 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp2 = Node->getOperand(1); Tmp3 = Node->getOperand(2); bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, - Tmp3, dl); + Tmp3, NeedInvert, dl); if (Legalized) { - // If we exapanded the SETCC by swapping LHS and RHS, create a new SETCC - // node. + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SETCC node. if (Tmp3.getNode()) Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3); + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0)); + Results.push_back(Tmp1); break; } @@ -3736,11 +3764,18 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (!Legalized) { Legalized = LegalizeSetCCCondCode( - getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, dl); + getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, + dl); assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); - // If we exapanded the SETCC by swapping LHS and RHS, create a new - // SELECT_CC node. + + // If we expanded the SETCC by inverting the condition code, then swap + // the True/False operands to match. + if (NeedInvert) + std::swap(Tmp3, Tmp4); + + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SELECT_CC node. if (CC.getNode()) { Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Tmp4, CC); @@ -3761,11 +3796,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp4 = Node->getOperand(1); // CC bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( - Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); + Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); (void)Legalized; assert(Legalized && "Can't legalize BR_CC with legal condition!"); - // If we exapanded the SETCC by swapping LHS and RHS, create a new BR_CC + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0)); + + // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC // node. if (Tmp4.getNode()) { Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, diff --git a/test/CodeGen/Mips/msa/compare.ll b/test/CodeGen/Mips/msa/compare.ll index 36569a984b..6408d7ba09 100644 --- a/test/CodeGen/Mips/msa/compare.ll +++ b/test/CodeGen/Mips/msa/compare.ll @@ -341,6 +341,91 @@ define void @clt_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { ; CHECK: .size clt_u_v2i64 } +; There is no != comparison, but test it anyway since we've had legalizer +; issues in this area. +define void @cne_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: cne_v16i8: + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ne <16 x i8> %1, %2 + %4 = sext <16 x i1> %3 to <16 x i8> + ; CHECK-DAG: ceq.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + ; CHECK-DAG: xori.b [[R3]], [[R3]], 255 + store <16 x i8> %4, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size cne_v16i8 +} + +; There is no != comparison, but test it anyway since we've had legalizer +; issues in this area. +define void @cne_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: cne_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ne <8 x i16> %1, %2 + %4 = sext <8 x i1> %3 to <8 x i16> + ; CHECK-DAG: ceq.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue + ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1 + ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]] + store <8 x i16> %4, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size cne_v8i16 +} + +; There is no != comparison, but test it anyway since we've had legalizer +; issues in this area. +define void @cne_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: cne_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ne <4 x i32> %1, %2 + %4 = sext <4 x i1> %3 to <4 x i32> + ; CHECK-DAG: ceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue + ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1 + ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]] + store <4 x i32> %4, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size cne_v4i32 +} + +; There is no != comparison, but test it anyway since we've had legalizer +; issues in this area. +define void @cne_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: cne_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ne <2 x i64> %1, %2 + %4 = sext <2 x i1> %3 to <2 x i64> + ; CHECK-DAG: ceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue + ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1 + ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]] + store <2 x i64> %4, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size cne_v2i64 +} + define void @ceqi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { ; CHECK: ceqi_v16i8: diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll b/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll new file mode 100644 index 0000000000..1a03e55d9d --- /dev/null +++ b/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll @@ -0,0 +1,149 @@ +; RUN: llc -march=mips < %s +; RUN: llc -march=mips -mattr=+msa,+fp64 < %s +; RUN: llc -march=mipsel < %s +; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s + +; This test originally failed for MSA with a +; "Don't know how to expand this condition!" unreachable. +; It should at least successfully build. + +define void @autogen_SD3861334421(i8*, i32*, i64*, i32, i64, i8) { +BB: + %A4 = alloca <2 x i32> + %A3 = alloca <2 x double> + %A2 = alloca i64 + %A1 = alloca i64 + %A = alloca double + %L = load i8* %0 + store i8 -101, i8* %0 + %E = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0 + %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1> + %I = insertelement <8 x i64> zeroinitializer, i64 %4, i32 5 + %B = and i64 116376, 57247 + %FC = uitofp i8 7 to double + %Sl = select i1 false, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %L5 = load i8* %0 + store i8 %L, i8* %0 + %E6 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 3 + %Shuff7 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0> + %I8 = insertelement <8 x i8> %Sl, i8 7, i32 4 + %B9 = or <8 x i64> zeroinitializer, zeroinitializer + %Sl10 = select i1 false, i64 116376, i64 380809 + %Cmp = icmp sgt i32 394647, 17081 + br label %CF + +CF: ; preds = %CF, %BB + %L11 = load i8* %0 + store i8 -87, i8* %0 + %E12 = extractelement <4 x i64> zeroinitializer, i32 0 + %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1, i32 3, i32 5> + %I14 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 1 + %B15 = srem i64 %Sl10, 380809 + %FC16 = sitofp i64 57247 to float + %Sl17 = select i1 false, double 0x87A9374869A78EC6, double 0.000000e+00 + %Cmp18 = icmp uge i8 %L, %5 + br i1 %Cmp18, label %CF, label %CF80 + +CF80: ; preds = %CF80, %CF88, %CF + %L19 = load i8* %0 + store i8 -101, i8* %0 + %E20 = extractelement <4 x i64> zeroinitializer, i32 0 + %Shuff21 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff7, <4 x i32> <i32 7, i32 1, i32 3, i32 5> + %I22 = insertelement <4 x i64> zeroinitializer, i64 127438, i32 1 + %B23 = fdiv double %Sl17, 0.000000e+00 + %Sl24 = select i1 %Cmp18, i32 420510, i32 492085 + %Cmp25 = icmp ugt i1 %Cmp18, false + br i1 %Cmp25, label %CF80, label %CF83 + +CF83: ; preds = %CF83, %CF80 + %L26 = load i8* %0 + store i8 -87, i8* %0 + %E27 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0 + %Shuff28 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 7, i32 1, i32 3, i32 5> + %I29 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 492085, i32 1 + %B30 = lshr <8 x i8> %I8, %I8 + %FC31 = sitofp <4 x i32> %Shuff28 to <4 x double> + %Sl32 = select i1 false, <8 x i8> %I8, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %Cmp33 = icmp eq i64 %B, 116376 + br i1 %Cmp33, label %CF83, label %CF88 + +CF88: ; preds = %CF83 + %L34 = load i8* %0 + store i8 -87, i8* %0 + %E35 = extractelement <8 x i64> %Shuff, i32 7 + %Shuff36 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %Shuff28, <4 x i32> <i32 2, i32 undef, i32 undef, i32 0> + %I37 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 0 + %B38 = xor <8 x i64> %B9, %B9 + %ZE = zext i32 0 to i64 + %Sl39 = select i1 %Cmp33, i8 %L11, i8 %L5 + %Cmp40 = icmp sgt i1 %Cmp, false + br i1 %Cmp40, label %CF80, label %CF81 + +CF81: ; preds = %CF81, %CF85, %CF87, %CF88 + %L41 = load i8* %0 + store i8 %L34, i8* %0 + %E42 = extractelement <8 x i64> %Shuff13, i32 6 + %Shuff43 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 7> + %I44 = insertelement <4 x i64> zeroinitializer, i64 116376, i32 3 + %B45 = fsub float %FC16, 0x3AC86DCC40000000 + %Tr = trunc <4 x i64> %I14 to <4 x i32> + %Sl46 = select i1 false, <8 x i64> %B38, <8 x i64> zeroinitializer + %Cmp47 = icmp sgt i1 %Cmp18, %Cmp18 + br i1 %Cmp47, label %CF81, label %CF85 + +CF85: ; preds = %CF81 + %L48 = load i8* %0 + store i8 -101, i8* %0 + %E49 = extractelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i32 2 + %Shuff50 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 1, i32 3> + %I51 = insertelement <4 x i64> zeroinitializer, i64 %E20, i32 3 + %B52 = or i32 336955, %Sl24 + %FC53 = uitofp i8 %L48 to double + %Sl54 = select i1 %Cmp47, i32 %3, i32 %Sl24 + %Cmp55 = icmp ne <8 x i64> %Shuff13, zeroinitializer + %L56 = load i8* %0 + store i8 %L11, i8* %0 + %E57 = extractelement <4 x i64> %Shuff21, i32 1 + %Shuff58 = shufflevector <8 x i64> %Shuff, <8 x i64> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 undef, i32 10, i32 12, i32 undef, i32 0, i32 2> + %I59 = insertelement <4 x i64> zeroinitializer, i64 %E42, i32 2 + %B60 = udiv <8 x i8> %Sl, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %Tr61 = trunc i8 49 to i1 + br i1 %Tr61, label %CF81, label %CF84 + +CF84: ; preds = %CF84, %CF85 + %Sl62 = select i1 false, i8 %L, i8 %L48 + %Cmp63 = icmp ne <8 x i64> %I, zeroinitializer + %L64 = load i8* %0 + store i8 %5, i8* %0 + %E65 = extractelement <8 x i1> %Cmp55, i32 0 + br i1 %E65, label %CF84, label %CF87 + +CF87: ; preds = %CF84 + %Shuff66 = shufflevector <4 x i64> %Shuff21, <4 x i64> %I14, <4 x i32> <i32 3, i32 undef, i32 7, i32 1> + %I67 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 %Sl54, i32 1 + %B68 = frem double %B23, %Sl17 + %ZE69 = zext <8 x i8> %Sl32 to <8 x i64> + %Sl70 = select i1 %Tr61, i64 %E20, i64 %E12 + %Cmp71 = icmp slt <8 x i64> %I, %Shuff + %L72 = load i8* %0 + store i8 %L72, i8* %0 + %E73 = extractelement <8 x i1> %Cmp55, i32 6 + br i1 %E73, label %CF81, label %CF82 + +CF82: ; preds = %CF82, %CF87 + %Shuff74 = shufflevector <4 x i32> %I67, <4 x i32> %I29, <4 x i32> <i32 1, i32 3, i32 undef, i32 7> + %I75 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 3 + %B76 = fsub double 0.000000e+00, %FC53 + %Tr77 = trunc i32 %E to i8 + %Sl78 = select i1 %Cmp18, i64* %A2, i64* %2 + %Cmp79 = icmp eq i32 394647, 492085 + br i1 %Cmp79, label %CF82, label %CF86 + +CF86: ; preds = %CF82 + store i64 %Sl70, i64* %Sl78 + store i64 %E57, i64* %Sl78 + store i64 %Sl70, i64* %Sl78 + store i64 %B, i64* %Sl78 + store i64 %Sl10, i64* %Sl78 + ret void +} |