}
declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
-; FIXME: We shouldn't commute this operation to fold the load.
+; We shouldn't commute this operation to fold the load.
define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(<32 x i8>* %ptr, <32 x i8> %a1) {
-; X86-LABEL: test_x86_avx2_mpsadbw_load_op0:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmpsadbw $7, (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x42,0x00,0x07]
-; X86-NEXT: retl # encoding: [0xc3]
+; X86-AVX-LABEL: test_x86_avx2_mpsadbw_load_op0:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX-NEXT: vmovdqa (%eax), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x08]
+; X86-AVX-NEXT: vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07]
+; X86-AVX-NEXT: retl # encoding: [0xc3]
;
-; X64-LABEL: test_x86_avx2_mpsadbw_load_op0:
-; X64: # %bb.0:
-; X64-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x42,0x07,0x07]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86-AVX512VL-LABEL: test_x86_avx2_mpsadbw_load_op0:
+; X86-AVX512VL: # %bb.0:
+; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX512VL-NEXT: vmovdqa (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x08]
+; X86-AVX512VL-NEXT: vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07]
+; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
+;
+; X64-AVX-LABEL: test_x86_avx2_mpsadbw_load_op0:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqa (%rdi), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x0f]
+; X64-AVX-NEXT: vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07]
+; X64-AVX-NEXT: retq # encoding: [0xc3]
+;
+; X64-AVX512VL-LABEL: test_x86_avx2_mpsadbw_load_op0:
+; X64-AVX512VL: # %bb.0:
+; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x0f]
+; X64-AVX512VL-NEXT: vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07]
+; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%a0 = load <32 x i8>, <32 x i8>* %ptr
%res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
ret <16 x i16> %res
}
declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
-; FIXME: We shouldn't commute this operation to fold the load.
+; We shouldn't commute this operation to fold the load.
define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(<16 x i8>* %ptr, <16 x i8> %a1) {
; X86-SSE-LABEL: test_x86_sse41_mpsadbw_load_op0:
; X86-SSE: ## %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT: mpsadbw $7, (%eax), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0x00,0x07]
+; X86-SSE-NEXT: movdqa (%eax), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x08]
+; X86-SSE-NEXT: mpsadbw $7, %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x3a,0x42,0xc8,0x07]
+; X86-SSE-NEXT: movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1]
; X86-SSE-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX1-LABEL: test_x86_sse41_mpsadbw_load_op0:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT: vmpsadbw $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x00,0x07]
+; X86-AVX1-NEXT: vmovdqa (%eax), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x08]
+; X86-AVX1-NEXT: vmpsadbw $7, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x42,0xc0,0x07]
; X86-AVX1-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX512-LABEL: test_x86_sse41_mpsadbw_load_op0:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT: vmpsadbw $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x00,0x07]
+; X86-AVX512-NEXT: vmovdqa (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x08]
+; X86-AVX512-NEXT: vmpsadbw $7, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x42,0xc0,0x07]
; X86-AVX512-NEXT: retl ## encoding: [0xc3]
;
; X64-SSE-LABEL: test_x86_sse41_mpsadbw_load_op0:
; X64-SSE: ## %bb.0:
-; X64-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0x07,0x07]
+; X64-SSE-NEXT: movdqa (%rdi), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x0f]
+; X64-SSE-NEXT: mpsadbw $7, %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x3a,0x42,0xc8,0x07]
+; X64-SSE-NEXT: movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1]
; X64-SSE-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX1-LABEL: test_x86_sse41_mpsadbw_load_op0:
; X64-AVX1: ## %bb.0:
-; X64-AVX1-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x07,0x07]
+; X64-AVX1-NEXT: vmovdqa (%rdi), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x0f]
+; X64-AVX1-NEXT: vmpsadbw $7, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x42,0xc0,0x07]
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX512-LABEL: test_x86_sse41_mpsadbw_load_op0:
; X64-AVX512: ## %bb.0:
-; X64-AVX512-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x07,0x07]
+; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x0f]
+; X64-AVX512-NEXT: vmpsadbw $7, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x42,0xc0,0x07]
; X64-AVX512-NEXT: retq ## encoding: [0xc3]
%a0 = load <16 x i8>, <16 x i8>* %ptr
%res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]