From 2bde3dcd32b3ce2c8855d13659c6708f4434a985 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sun, 19 Sep 2021 13:12:30 -0700
Subject: [PATCH] [X86] Add test cases for pr51908. NFC

---
 llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll  | 16 +++++++++++
 llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll | 38 +++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 06cab29..29b32e7 100644
--- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -731,6 +731,22 @@ define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
 }
 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
 
+; FIXME: We shouldn't commute this operation to fold the load.
+define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(<32 x i8>* %ptr, <32 x i8> %a1) {
+; X86-LABEL: test_x86_avx2_mpsadbw_load_op0:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vmpsadbw $7, (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x42,0x00,0x07]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_x86_avx2_mpsadbw_load_op0:
+; X64:       # %bb.0:
+; X64-NEXT:    vmpsadbw $7, (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x42,0x07,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %a0 = load <32 x i8>, <32 x i8>* %ptr
+  %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
 
 define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
 ; AVX2-LABEL: test_x86_avx2_packusdw:
diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
index 3218916..66e7836 100644
--- a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
@@ -116,6 +116,44 @@ define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
 }
 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
 
+; FIXME: We shouldn't commute this operation to fold the load.
+define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(<16 x i8>* %ptr, <16 x i8> %a1) {
+; X86-SSE-LABEL: test_x86_sse41_mpsadbw_load_op0:
+; X86-SSE:       ## %bb.0:
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-SSE-NEXT:    mpsadbw $7, (%eax), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0x00,0x07]
+; X86-SSE-NEXT:    retl ## encoding: [0xc3]
+;
+; X86-AVX1-LABEL: test_x86_sse41_mpsadbw_load_op0:
+; X86-AVX1:       ## %bb.0:
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX1-NEXT:    vmpsadbw $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x00,0x07]
+; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
+;
+; X86-AVX512-LABEL: test_x86_sse41_mpsadbw_load_op0:
+; X86-AVX512:       ## %bb.0:
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX512-NEXT:    vmpsadbw $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x00,0x07]
+; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-SSE-LABEL: test_x86_sse41_mpsadbw_load_op0:
+; X64-SSE:       ## %bb.0:
+; X64-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0x07,0x07]
+; X64-SSE-NEXT:    retq ## encoding: [0xc3]
+;
+; X64-AVX1-LABEL: test_x86_sse41_mpsadbw_load_op0:
+; X64-AVX1:       ## %bb.0:
+; X64-AVX1-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x07,0x07]
+; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
+;
+; X64-AVX512-LABEL: test_x86_sse41_mpsadbw_load_op0:
+; X64-AVX512:       ## %bb.0:
+; X64-AVX512-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x07,0x07]
+; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
+  %a0 = load <16 x i8>, <16 x i8>* %ptr
+  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
 
 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
 ; SSE-LABEL: test_x86_sse41_packusdw:
-- 
2.7.4