diff options
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_ins_extract-1.ll | 25 |
2 files changed, 30 insertions, 1 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ab2c832300..16f17e56f4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2597,9 +2597,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector // operations are lowered to scalars. if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { + // If the indices are the same, return the inserted element. if (N1.getOperand(2) == N2) return N1.getOperand(1); - else + // If the indices are known different, extract the element from + // the original vector. + else if (isa<ConstantSDNode>(N1.getOperand(2)) && + isa<ConstantSDNode>(N2)) return getNode(ISD::EXTRACT_VECTOR_ELT, VT, N1.getOperand(0), N2); } break; diff --git a/test/CodeGen/X86/vec_ins_extract-1.ll b/test/CodeGen/X86/vec_ins_extract-1.ll new file mode 100644 index 0000000000..44ae039384 --- /dev/null +++ b/test/CodeGen/X86/vec_ins_extract-1.ll @@ -0,0 +1,25 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep {(%esp,%eax,4)} | count 4 + +; Inserts and extracts with variable indices must be lowered +; to memory accesses. + +define i32 @t0(i32 inreg %t7, <4 x i32> inreg %t8) nounwind { + %t13 = insertelement <4 x i32> %t8, i32 76, i32 %t7 + %t9 = extractelement <4 x i32> %t13, i32 0 + ret i32 %t9 +} +define i32 @t1(i32 inreg %t7, <4 x i32> inreg %t8) nounwind { + %t13 = insertelement <4 x i32> %t8, i32 76, i32 0 + %t9 = extractelement <4 x i32> %t13, i32 %t7 + ret i32 %t9 +} +define <4 x i32> @t2(i32 inreg %t7, <4 x i32> inreg %t8) nounwind { + %t9 = extractelement <4 x i32> %t8, i32 %t7 + %t13 = insertelement <4 x i32> %t8, i32 %t9, i32 0 + ret <4 x i32> %t13 +} +define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind { + %t9 = extractelement <4 x i32> %t8, i32 0 + %t13 = insertelement <4 x i32> %t8, i32 %t9, i32 %t7 + ret <4 x i32> %t13 +} |