From 74168ded0399a30fe9cf4d73a28a6045cc685088 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 18 Aug 2019 06:28:06 +0000 Subject: [PATCH] [TargetLowering] Teach computeRegisterProperties to only widen v3i16/v3f16 vectors to the next power of 2 type if that's legal. These were recently made simple types. This restores their behavior back to something like their EVT legalization. We might be able to fix the code in type legalization where the assert was failing, but I didn't investigate too much as I had already looked at the computeRegisterProperties code during the review for v3i16/v3f16. Most of the test changes restore the X86 codegen back to what it looked like before the recent change. The test case in vec_setcc.ll and is a reduced version of the reproducer from the fuzzer. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16490 llvm-svn: 369205 --- llvm/lib/CodeGen/TargetLoweringBase.cpp | 34 +++++++--- llvm/test/CodeGen/X86/promote-vec3.ll | 116 ++++++++++++++++++++++---------- llvm/test/CodeGen/X86/vec_cast.ll | 46 ++++++++++--- llvm/test/CodeGen/X86/vec_setcc.ll | 45 +++++++++++++ llvm/test/CodeGen/X86/widen_load-2.ll | 20 +++--- 5 files changed, 195 insertions(+), 66 deletions(-) diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 982e318..b9d6738 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1282,21 +1282,33 @@ void TargetLoweringBase::computeRegisterProperties( LLVM_FALLTHROUGH; case TypeWidenVector: - // Try to widen the vector. - for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType) nVT; - if (SVT.getVectorElementType() == EltVT - && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { - TransformToType[i] = SVT; - RegisterTypeForVT[i] = SVT; - NumRegistersForVT[i] = 1; + if (isPowerOf2_32(NElts)) { + // Try to widen the vector. + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; + if (SVT.getVectorElementType() == EltVT + && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) + break; + } else { + // Only widen to the next power of 2 to keep consistency with EVT. + MVT NVT = VT.getPow2VectorType(); + if (isTypeLegal(NVT)) { + TransformToType[i] = NVT; ValueTypeActions.setTypeAction(VT, TypeWidenVector); - IsLegalWiderType = true; + RegisterTypeForVT[i] = NVT; + NumRegistersForVT[i] = 1; break; } } - if (IsLegalWiderType) - break; LLVM_FALLTHROUGH; case TypeSplitVector: diff --git a/llvm/test/CodeGen/X86/promote-vec3.ll b/llvm/test/CodeGen/X86/promote-vec3.ll index 27d3a2f..61df546 100644 --- a/llvm/test/CodeGen/X86/promote-vec3.ll +++ b/llvm/test/CodeGen/X86/promote-vec3.ll @@ -8,36 +8,58 @@ define <3 x i16> @zext_i8(<3 x i8>) { ; SSE3-LABEL: zext_i8: ; SSE3: # %bb.0: -; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; SSE3-NEXT: movd %edx, %xmm0 -; SSE3-NEXT: pinsrw $1, %ecx, %xmm0 -; SSE3-NEXT: pinsrw $2, %eax, %xmm0 +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: movd %eax, %xmm0 +; SSE3-NEXT: pinsrw $1, %edx, %xmm0 +; SSE3-NEXT: pinsrw $2, %ecx, %xmm0 +; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: # kill: def $ax killed $ax killed $eax +; SSE3-NEXT: # kill: def $dx killed $dx killed $edx +; SSE3-NEXT: # kill: def $cx killed $cx killed $ecx ; SSE3-NEXT: retl ; ; SSE41-LABEL: zext_i8: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 +; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0 ; SSE41-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0 -; SSE41-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: pextrw $1, %xmm0, %edx +; SSE41-NEXT: pextrw $2, %xmm0, %ecx +; SSE41-NEXT: # kill: def $ax killed $ax killed $eax +; SSE41-NEXT: # kill: def $dx killed $dx killed $edx +; SSE41-NEXT: # kill: def $cx killed $cx killed $ecx ; SSE41-NEXT: retl ; ; AVX-32-LABEL: zext_i8: ; AVX-32: # %bb.0: -; AVX-32-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX-32-NEXT: vmovd %xmm0, %eax +; AVX-32-NEXT: vpextrw $1, %xmm0, %edx +; AVX-32-NEXT: vpextrw $2, %xmm0, %ecx +; AVX-32-NEXT: # kill: def $ax killed $ax killed $eax +; AVX-32-NEXT: # kill: def $dx killed $dx killed $edx +; AVX-32-NEXT: # kill: def $cx killed $cx killed $ecx ; AVX-32-NEXT: retl ; ; AVX-64-LABEL: zext_i8: ; AVX-64: # %bb.0: -; AVX-64-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX-64-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0 -; AVX-64-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 -; AVX-64-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 +; AVX-64-NEXT: vmovd %edi, %xmm0 +; AVX-64-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 +; AVX-64-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 +; AVX-64-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX-64-NEXT: vmovd %xmm0, %eax +; AVX-64-NEXT: vpextrw $1, %xmm0, %edx +; AVX-64-NEXT: vpextrw $2, %xmm0, %ecx +; AVX-64-NEXT: # kill: def $ax killed $ax killed $eax +; AVX-64-NEXT: # kill: def $dx killed $dx killed $edx +; AVX-64-NEXT: # kill: def $cx killed $cx killed $ecx ; AVX-64-NEXT: retq %2 = zext <3 x i8> %0 to <3 x i16> ret <3 x i16> %2 @@ -46,42 +68,64 @@ define <3 x i16> @zext_i8(<3 x i8>) { define <3 x i16> @sext_i8(<3 x i8>) { ; SSE3-LABEL: sext_i8: ; SSE3: # %bb.0: -; SSE3-NEXT: movsbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: movd %eax, %xmm0 -; SSE3-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: shll $8, %eax +; SSE3-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SSE3-NEXT: shll $8, %ecx +; SSE3-NEXT: movd %ecx, %xmm0 ; SSE3-NEXT: pinsrw $1, %eax, %xmm0 -; SSE3-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: shll $8, %eax ; SSE3-NEXT: pinsrw $2, %eax, %xmm0 +; SSE3-NEXT: psraw $8, %xmm0 +; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: pextrw $1, %xmm0, %edx +; SSE3-NEXT: pextrw $2, %xmm0, %ecx +; SSE3-NEXT: # kill: def $ax killed $ax killed $eax +; SSE3-NEXT: # kill: def $dx killed $dx killed $edx +; SSE3-NEXT: # kill: def $cx killed $cx killed $ecx ; SSE3-NEXT: retl ; ; SSE41-LABEL: sext_i8: ; SSE41: # %bb.0: -; SSE41-NEXT: movsbl {{[0-9]+}}(%esp), %eax -; SSE41-NEXT: movd %eax, %xmm0 -; SSE41-NEXT: movsbl {{[0-9]+}}(%esp), %eax -; SSE41-NEXT: pinsrw $1, %eax, %xmm0 -; SSE41-NEXT: movsbl {{[0-9]+}}(%esp), %eax -; SSE41-NEXT: pinsrw $2, %eax, %xmm0 +; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0 +; SSE41-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0 +; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 +; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: pextrw $1, %xmm0, %edx +; SSE41-NEXT: pextrw $2, %xmm0, %ecx +; SSE41-NEXT: # kill: def $ax killed $ax killed $eax +; SSE41-NEXT: # kill: def $dx killed $dx killed $edx +; SSE41-NEXT: # kill: def $cx killed $cx killed $ecx ; SSE41-NEXT: retl ; ; AVX-32-LABEL: sext_i8: ; AVX-32: # %bb.0: -; AVX-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax -; AVX-32-NEXT: vmovd %eax, %xmm0 -; AVX-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax -; AVX-32-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; AVX-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax -; AVX-32-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 +; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vpmovsxbw %xmm0, %xmm0 +; AVX-32-NEXT: vmovd %xmm0, %eax +; AVX-32-NEXT: vpextrw $1, %xmm0, %edx +; AVX-32-NEXT: vpextrw $2, %xmm0, %ecx +; AVX-32-NEXT: # kill: def $ax killed $ax killed $eax +; AVX-32-NEXT: # kill: def $dx killed $dx killed $edx +; AVX-32-NEXT: # kill: def $cx killed $cx killed $ecx ; AVX-32-NEXT: retl ; ; AVX-64-LABEL: sext_i8: ; AVX-64: # %bb.0: -; AVX-64-NEXT: movsbl %sil, %eax -; AVX-64-NEXT: movsbl %dil, %ecx -; AVX-64-NEXT: vmovd %ecx, %xmm0 -; AVX-64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; AVX-64-NEXT: movsbl %dl, %eax -; AVX-64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 +; AVX-64-NEXT: vmovd %edi, %xmm0 +; AVX-64-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 +; AVX-64-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 +; AVX-64-NEXT: vpmovsxbw %xmm0, %xmm0 +; AVX-64-NEXT: vmovd %xmm0, %eax +; AVX-64-NEXT: vpextrw $1, %xmm0, %edx +; AVX-64-NEXT: vpextrw $2, %xmm0, %ecx +; AVX-64-NEXT: # kill: def $ax killed $ax killed $eax +; AVX-64-NEXT: # kill: def $dx killed $dx killed $edx +; AVX-64-NEXT: # kill: def $cx killed $cx killed $ecx ; AVX-64-NEXT: retq %2 = sext <3 x i8> %0 to <3 x i16> ret <3 x i16> %2 diff --git a/llvm/test/CodeGen/X86/vec_cast.ll b/llvm/test/CodeGen/X86/vec_cast.ll index 49ccde25..6e9a167 100644 --- a/llvm/test/CodeGen/X86/vec_cast.ll +++ b/llvm/test/CodeGen/X86/vec_cast.ll @@ -27,13 +27,22 @@ define <8 x i32> @a(<8 x i16> %a) nounwind { define <3 x i32> @b(<3 x i16> %a) nounwind { ; CHECK-LIN-LABEL: b: ; CHECK-LIN: # %bb.0: -; CHECK-LIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; CHECK-LIN-NEXT: pxor %xmm0, %xmm0 +; CHECK-LIN-NEXT: pinsrw $1, %edi, %xmm0 +; CHECK-LIN-NEXT: pinsrw $3, %esi, %xmm0 +; CHECK-LIN-NEXT: pinsrw $5, %edx, %xmm0 ; CHECK-LIN-NEXT: psrad $16, %xmm0 ; CHECK-LIN-NEXT: retq ; ; CHECK-WIN-LABEL: b: ; CHECK-WIN: # %bb.0: -; CHECK-WIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; CHECK-WIN-NEXT: # kill: def $r8w killed $r8w def $r8d +; CHECK-WIN-NEXT: # kill: def $dx killed $dx def $edx +; CHECK-WIN-NEXT: # kill: def $cx killed $cx def $ecx +; CHECK-WIN-NEXT: pxor %xmm0, %xmm0 +; CHECK-WIN-NEXT: pinsrw $1, %ecx, %xmm0 +; CHECK-WIN-NEXT: pinsrw $3, %edx, %xmm0 +; CHECK-WIN-NEXT: pinsrw $5, %r8d, %xmm0 ; CHECK-WIN-NEXT: psrad $16, %xmm0 ; CHECK-WIN-NEXT: retq %c = sext <3 x i16> %a to <3 x i32> @@ -78,15 +87,21 @@ define <8 x i32> @d(<8 x i16> %a) nounwind { define <3 x i32> @e(<3 x i16> %a) nounwind { ; CHECK-LIN-LABEL: e: ; CHECK-LIN: # %bb.0: -; CHECK-LIN-NEXT: pxor %xmm1, %xmm1 -; CHECK-LIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK-LIN-NEXT: pxor %xmm0, %xmm0 +; CHECK-LIN-NEXT: pinsrw $0, %edi, %xmm0 +; CHECK-LIN-NEXT: pinsrw $2, %esi, %xmm0 +; CHECK-LIN-NEXT: pinsrw $4, %edx, %xmm0 ; CHECK-LIN-NEXT: retq ; ; CHECK-WIN-LABEL: e: ; CHECK-WIN: # %bb.0: -; CHECK-WIN-NEXT: movdqa (%rcx), %xmm0 -; CHECK-WIN-NEXT: pxor %xmm1, %xmm1 -; CHECK-WIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK-WIN-NEXT: # kill: def $r8w killed $r8w def $r8d +; CHECK-WIN-NEXT: # kill: def $dx killed $dx def $edx +; CHECK-WIN-NEXT: # kill: def $cx killed $cx def $ecx +; CHECK-WIN-NEXT: pxor %xmm0, %xmm0 +; CHECK-WIN-NEXT: pinsrw $0, %ecx, %xmm0 +; CHECK-WIN-NEXT: pinsrw $2, %edx, %xmm0 +; CHECK-WIN-NEXT: pinsrw $4, %r8d, %xmm0 ; CHECK-WIN-NEXT: retq %c = zext <3 x i16> %a to <3 x i32> ret <3 x i32> %c @@ -131,14 +146,23 @@ define <8 x i16> @g(<8 x i32> %a) nounwind { define <3 x i16> @h(<3 x i32> %a) nounwind { ; CHECK-LIN-LABEL: h: ; CHECK-LIN: # %bb.0: -; CHECK-LIN-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; CHECK-LIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; CHECK-LIN-NEXT: movd %xmm0, %eax +; CHECK-LIN-NEXT: pextrw $2, %xmm0, %edx +; CHECK-LIN-NEXT: pextrw $4, %xmm0, %ecx +; CHECK-LIN-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-LIN-NEXT: # kill: def $dx killed $dx killed $edx +; CHECK-LIN-NEXT: # kill: def $cx killed $cx killed $ecx ; CHECK-LIN-NEXT: retq ; ; CHECK-WIN-LABEL: h: ; CHECK-WIN: # %bb.0: -; CHECK-WIN-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7] -; CHECK-WIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; CHECK-WIN-NEXT: movdqa (%rcx), %xmm0 +; CHECK-WIN-NEXT: movd %xmm0, %eax +; CHECK-WIN-NEXT: pextrw $2, %xmm0, %edx +; CHECK-WIN-NEXT: pextrw $4, %xmm0, %ecx +; CHECK-WIN-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-WIN-NEXT: # kill: def $dx killed $dx killed $edx +; CHECK-WIN-NEXT: # kill: def $cx killed $cx killed $ecx ; CHECK-WIN-NEXT: retq %c = trunc <3 x i32> %a to <3 x i16> ret <3 x i16> %c diff --git a/llvm/test/CodeGen/X86/vec_setcc.ll b/llvm/test/CodeGen/X86/vec_setcc.ll index e9494d8..127222b 100644 --- a/llvm/test/CodeGen/X86/vec_setcc.ll +++ b/llvm/test/CodeGen/X86/vec_setcc.ll @@ -197,3 +197,48 @@ define <2 x i64> @test_setcc_constfold_vi64(<2 x i64> %l, <2 x i64> %r) { %res = or <2 x i64> %mask1, %mask2 ret <2 x i64> %res } + +; This asserted in type legalization for v3i1 setcc after v3i16 was made +; a simple value type. +define <3 x i1> @test_setcc_v3i1_v3i16(<3 x i16>* %a) nounwind { +; SSE2-LABEL: test_setcc_v3i1_v3i16: +; SSE2: # %bb.0: +; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpeqw %xmm0, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_setcc_v3i1_v3i16: +; SSE41: # %bb.0: +; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pcmpeqw %xmm0, %xmm1 +; SSE41-NEXT: pextrb $0, %xmm1, %eax +; SSE41-NEXT: pextrb $2, %xmm1, %edx +; SSE41-NEXT: pextrb $4, %xmm1, %ecx +; SSE41-NEXT: # kill: def $al killed $al killed $eax +; SSE41-NEXT: # kill: def $dl killed $dl killed $edx +; SSE41-NEXT: # kill: def $cl killed $cl killed $ecx +; SSE41-NEXT: retq +; +; AVX-LABEL: test_setcc_v3i1_v3i16: +; AVX: # %bb.0: +; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vpextrb $2, %xmm0, %edx +; AVX-NEXT: vpextrb $4, %xmm0, %ecx +; AVX-NEXT: # kill: def $al killed $al killed $eax +; AVX-NEXT: # kill: def $dl killed $dl killed $edx +; AVX-NEXT: # kill: def $cl killed $cl killed $ecx +; AVX-NEXT: retq + %b = load <3 x i16>, <3 x i16>* %a + %cmp = icmp eq <3 x i16> %b, + ret <3 x i1> %cmp +} diff --git a/llvm/test/CodeGen/X86/widen_load-2.ll b/llvm/test/CodeGen/X86/widen_load-2.ll index a6195af..a816dd0 100644 --- a/llvm/test/CodeGen/X86/widen_load-2.ll +++ b/llvm/test/CodeGen/X86/widen_load-2.ll @@ -151,19 +151,23 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movdqa (%edx), %xmm0 -; X86-NEXT: paddw (%ecx), %xmm0 -; X86-NEXT: pextrw $2, %xmm0, 4(%eax) -; X86-NEXT: movd %xmm0, (%eax) +; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: pinsrw $2, 4(%edx), %xmm0 +; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-NEXT: pinsrw $2, 4(%ecx), %xmm1 +; X86-NEXT: paddw %xmm0, %xmm1 +; X86-NEXT: pextrw $2, %xmm1, 4(%eax) +; X86-NEXT: movd %xmm1, (%eax) ; X86-NEXT: retl $4 ; ; X64-LABEL: add3i16: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movdqa (%rsi), %xmm0 -; X64-NEXT: paddw (%rdx), %xmm0 -; X64-NEXT: pextrw $2, %xmm0, 4(%rdi) -; X64-NEXT: movd %xmm0, (%rdi) +; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: paddw %xmm0, %xmm1 +; X64-NEXT: pextrw $2, %xmm1, 4(%rdi) +; X64-NEXT: movd %xmm1, (%rdi) ; X64-NEXT: retq %a = load %i16vec3, %i16vec3* %ap, align 16 %b = load %i16vec3, %i16vec3* %bp, align 16 -- 2.7.4