From da73a09fcdeca4c093cdb58c3965cfacb42383f4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 3 Dec 2016 05:35:38 +0000 Subject: [PATCH] [X86] Add test cases demonstrating where we incorrectly commute VEX VPMADDUSBW due to a bug introduced in r285515. I believe this is the cause of PR31241. llvm-svn: 288577 --- llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 28 ++++++++++++++++--- llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll | 42 ++++++++++++++++++++-------- 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index d387d12..c0bc259 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -2540,6 +2540,26 @@ define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) { declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone +; Make sure we don't commute this operation. +define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128_load_op0(<16 x i8>* %ptr, <16 x i8> %a1) { +; AVX-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0: +; AVX: ## BB#0: +; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; AVX-NEXT: vpmaddubsw (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x04,0x00] +; AVX-NEXT: retl ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; AVX512VL-NEXT: vmovdqu8 (%eax), %xmm1 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x08] +; AVX512VL-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0x04,0xc0] +; AVX512VL-NEXT: retl ## encoding: [0xc3] + %a0 = load <16 x i8>, <16 x i8>* %ptr + %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} + + define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { ; AVX-LABEL: test_x86_ssse3_pmul_hr_sw_128: ; AVX: ## BB#0: @@ -3717,8 +3737,8 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { ; AVX-LABEL: movnt_dq: ; AVX: ## BB#0: ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX-NEXT: vpaddq LCPI246_0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd4,0x05,A,A,A,A] -; AVX-NEXT: ## fixup A - offset: 4, value: LCPI246_0, kind: FK_Data_4 +; AVX-NEXT: vpaddq LCPI247_0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd4,0x05,A,A,A,A] +; AVX-NEXT: ## fixup A - offset: 4, value: LCPI247_0, kind: FK_Data_4 ; AVX-NEXT: vmovntdq %ymm0, (%eax) ## encoding: [0xc5,0xfd,0xe7,0x00] ; AVX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; AVX-NEXT: retl ## encoding: [0xc3] @@ -3726,8 +3746,8 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { ; AVX512VL-LABEL: movnt_dq: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX512VL-NEXT: vpaddq LCPI246_0, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI246_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpaddq LCPI247_0, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI247_0, kind: FK_Data_4 ; AVX512VL-NEXT: vmovntdq %ymm0, (%eax) ## encoding: [0x62,0xf1,0x7d,0x28,0xe7,0x00] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %a2 = add <2 x i64> %a1, diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll index 0ca67e2..c8c0e10 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -756,6 +756,24 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) { } declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone +; Make sure we don't commute this operation. +define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(<32 x i8>* %ptr, <32 x i8> %a1) { +; AVX2-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0: +; AVX2: ## BB#0: +; AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; AVX2-NEXT: vpmaddubsw (%eax), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x04,0x00] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; AVX512VL-NEXT: vmovdqu8 (%eax), %ymm1 ## encoding: [0x62,0xf1,0x7f,0x28,0x6f,0x08] +; AVX512VL-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x04,0xc0] +; AVX512VL-NEXT: retl ## encoding: [0xc3] + %a0 = load <32 x i8>, <32 x i8>* %ptr + %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) { ; AVX2-LABEL: test_x86_avx2_pmul_hr_sw: @@ -1356,18 +1374,18 @@ define <4 x i32> @test_x86_avx2_psrav_d_const(<4 x i32> %a0, <4 x i32> %a1) { ; AVX2: ## BB#0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23] ; AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI90_0, kind: FK_Data_4 -; AVX2-NEXT: vpsravd LCPI90_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI90_1, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI91_0, kind: FK_Data_4 +; AVX2-NEXT: vpsravd LCPI91_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI91_1, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; AVX512VL-LABEL: test_x86_avx2_psrav_d_const: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: vmovdqa32 {{.*#+}} xmm0 = [2,9,4294967284,23] ; AVX512VL-NEXT: ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI90_0, kind: FK_Data_4 -; AVX512VL-NEXT: vpsravd LCPI90_1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x46,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI90_1, kind: FK_Data_4 +; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI91_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpsravd LCPI91_1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x46,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI91_1, kind: FK_Data_4 ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> , <4 x i32> ) ret <4 x i32> %res @@ -1393,18 +1411,18 @@ define <8 x i32> @test_x86_avx2_psrav_d_256_const(<8 x i32> %a0, <8 x i32> %a1) ; AVX2: ## BB#0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI92_0, kind: FK_Data_4 -; AVX2-NEXT: vpsravd LCPI92_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI92_1, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI93_0, kind: FK_Data_4 +; AVX2-NEXT: vpsravd LCPI93_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI93_1, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; AVX512VL-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI92_0, kind: FK_Data_4 -; AVX512VL-NEXT: vpsravd LCPI92_1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI92_1, kind: FK_Data_4 +; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI93_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpsravd LCPI93_1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI93_1, kind: FK_Data_4 ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> , <8 x i32> ) ret <8 x i32> %res -- 2.7.4