;
; SKX-LABEL: test31_commute:
; SKX: ## %bb.0:
-; SKX-NEXT: vmovupd (%rdi), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x17]
-; SKX-NEXT: vcmpltpd %xmm0, %xmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x08,0xc2,0xc8,0x01]
+; SKX-NEXT: vcmpgtpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test32_commute:
; SKX: ## %bb.0:
-; SKX-NEXT: vmovupd (%rdi), %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x17]
-; SKX-NEXT: vcmpltpd %ymm0, %ymm2, %k1 ## encoding: [0x62,0xf1,0xed,0x28,0xc2,0xc8,0x01]
+; SKX-NEXT: vcmpgtpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
define <8 x double> @test33_commute(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
; CHECK-LABEL: test33_commute:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vmovupd (%rdi), %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x17]
-; CHECK-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
+; CHECK-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load <8 x double>, <8 x double>* %yp, align 4
;
; SKX-LABEL: test34_commute:
; SKX: ## %bb.0:
-; SKX-NEXT: vmovups (%rdi), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x17]
-; SKX-NEXT: vcmpltps %xmm0, %xmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x08,0xc2,0xc8,0x01]
+; SKX-NEXT: vcmpgtps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%y = load <4 x float>, <4 x float>* %yp, align 4
;
; SKX-LABEL: test35_commute:
; SKX: ## %bb.0:
-; SKX-NEXT: vmovups (%rdi), %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x17]
-; SKX-NEXT: vcmpltps %ymm0, %ymm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x28,0xc2,0xc8,0x01]
+; SKX-NEXT: vcmpgtps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
; CHECK-LABEL: test36_commute:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vmovups (%rdi), %zmm2 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x17]
-; CHECK-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
+; CHECK-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load <16 x float>, <16 x float>* %yp, align 4
define <8 x double> @test37_commute(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
; CHECK-LABEL: test37_commute:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vbroadcastsd (%rdi), %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x17]
-; CHECK-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
+; CHECK-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test38_commute:
; SKX: ## %bb.0:
-; SKX-NEXT: vbroadcastsd (%rdi), %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x17]
-; SKX-NEXT: vcmpltpd %ymm0, %ymm2, %k1 ## encoding: [0x62,0xf1,0xed,0x28,0xc2,0xc8,0x01]
+; SKX-NEXT: vcmpgtpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test39_commute:
; SKX: ## %bb.0:
-; SKX-NEXT: vmovddup (%rdi), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x17]
-; SKX-NEXT: ## xmm2 = mem[0,0]
-; SKX-NEXT: vcmpltpd %xmm0, %xmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x08,0xc2,0xc8,0x01]
+; SKX-NEXT: vcmpgtpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
define <16 x float> @test40_commute(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind {
; CHECK-LABEL: test40_commute:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vbroadcastss (%rdi), %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x17]
-; CHECK-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
+; CHECK-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test41_commute:
; SKX: ## %bb.0:
-; SKX-NEXT: vbroadcastss (%rdi), %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x17]
-; SKX-NEXT: vcmpltps %ymm0, %ymm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x28,0xc2,0xc8,0x01]
+; SKX-NEXT: vcmpgtps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test42_commute:
; SKX: ## %bb.0:
-; SKX-NEXT: vbroadcastss (%rdi), %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x17]
-; SKX-NEXT: vcmpltps %xmm0, %xmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x08,0xc2,0xc8,0x01]
+; SKX-NEXT: vcmpgtps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
; KNL: ## %bb.0:
; KNL-NEXT: vpmovsxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd2]
; KNL-NEXT: vpsllq $63, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf2,0x3f]
-; KNL-NEXT: vbroadcastsd (%rdi), %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x1f]
-; KNL-NEXT: vcmpltpd %zmm0, %zmm3, %k1 ## encoding: [0x62,0xf1,0xe5,0x48,0xc2,0xc8,0x01]
+; KNL-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x27,0xca]
; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; KNL-NEXT: retq ## encoding: [0xc3]
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsllw $15, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
; AVX512BW-NEXT: vpmovw2m %zmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x29,0xca]
-; AVX512BW-NEXT: vbroadcastsd (%rdi), %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x17]
-; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0xed,0x49,0xc2,0xc8,0x01]
+; AVX512BW-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
; AVX512BW-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX: ## %bb.0:
; SKX-NEXT: vpsllw $15, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
; SKX-NEXT: vpmovw2m %xmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xca]
-; SKX-NEXT: vbroadcastsd (%rdi), %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x17]
-; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0xed,0x49,0xc2,0xc8,0x01]
+; SKX-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
;
; AVX512-LABEL: commute_cmpps_eq_zmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovaps (%rdi), %zmm1
-; AVX512-NEXT: vcmpeqps %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpeqps (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2d %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <16 x float>, <16 x float>* %a0
;
; AVX512-LABEL: commute_cmpps_ne_zmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovaps (%rdi), %zmm1
-; AVX512-NEXT: vcmpneqps %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpneqps (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2d %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <16 x float>, <16 x float>* %a0
;
; AVX512-LABEL: commute_cmpps_ord_zmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovaps (%rdi), %zmm1
-; AVX512-NEXT: vcmpordps %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpordps (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2d %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <16 x float>, <16 x float>* %a0
;
; AVX512-LABEL: commute_cmpps_uno_zmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovaps (%rdi), %zmm1
-; AVX512-NEXT: vcmpunordps %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpunordps (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2d %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <16 x float>, <16 x float>* %a0
;
; AVX512-LABEL: commute_cmpps_ueq_zmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovaps (%rdi), %zmm1
-; AVX512-NEXT: vcmpeq_uqps %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpeq_uqps (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2d %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <16 x float>, <16 x float>* %a0
;
; AVX512-LABEL: commute_cmpps_one_zmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovaps (%rdi), %zmm1
-; AVX512-NEXT: vcmpneq_oqps %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpneq_oqps (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2d %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <16 x float>, <16 x float>* %a0
;
; AVX512-LABEL: commute_cmpps_lt_zmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovaps (%rdi), %zmm1
-; AVX512-NEXT: vcmpltps %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpgtps (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2d %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <16 x float>, <16 x float>* %a0
;
; AVX512-LABEL: commute_cmpps_le_zmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovaps (%rdi), %zmm1
-; AVX512-NEXT: vcmpleps %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpgeps (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2d %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <16 x float>, <16 x float>* %a0
;
; AVX512-LABEL: commute_cmppd_eq_zmmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovapd (%rdi), %zmm1
-; AVX512-NEXT: vcmpeqpd %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2q %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <8 x double>, <8 x double>* %a0
;
; AVX512-LABEL: commute_cmppd_ne_zmmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovapd (%rdi), %zmm1
-; AVX512-NEXT: vcmpneqpd %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpneqpd (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2q %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <8 x double>, <8 x double>* %a0
;
; AVX512-LABEL: commute_cmppd_ord_zmmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovapd (%rdi), %zmm1
-; AVX512-NEXT: vcmpordpd %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpordpd (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2q %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <8 x double>, <8 x double>* %a0
;
; AVX512-LABEL: commute_cmppd_uno_zmmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovapd (%rdi), %zmm1
-; AVX512-NEXT: vcmpunordpd %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpunordpd (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2q %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <8 x double>, <8 x double>* %a0
;
; AVX512-LABEL: commute_cmppd_ueq_zmmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovapd (%rdi), %zmm1
-; AVX512-NEXT: vcmpeq_uqpd %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpeq_uqpd (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2q %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <8 x double>, <8 x double>* %a0
;
; AVX512-LABEL: commute_cmppd_one_zmmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovapd (%rdi), %zmm1
-; AVX512-NEXT: vcmpneq_oqpd %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpneq_oqpd (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2q %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <8 x double>, <8 x double>* %a0
;
; AVX512-LABEL: commute_cmppd_lt_zmmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovapd (%rdi), %zmm1
-; AVX512-NEXT: vcmpltpd %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpgtpd (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2q %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <8 x double>, <8 x double>* %a0
;
; AVX512-LABEL: commute_cmppd_le_zmmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovapd (%rdi), %zmm1
-; AVX512-NEXT: vcmplepd %zmm0, %zmm1, %k0
+; AVX512-NEXT: vcmpgepd (%rdi), %zmm0, %k0
; AVX512-NEXT: vpmovm2q %k0, %zmm0
; AVX512-NEXT: retq
%1 = load <8 x double>, <8 x double>* %a0