%res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
ret <4 x i32> %res
}
-define <4 x i32> @test_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec) {\r
-; CHECK-LABEL: test_8xi32_to_4xi32_perm_mask3:\r
-; CHECK: # %bb.0:\r
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1\r
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]\r
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,1]\r
-; CHECK-NEXT: vzeroupper\r
-; CHECK-NEXT: retq\r
- %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <4 x i32> <i32 5, i32 3, i32 2, i32 5>\r
+define <4 x i32> @test_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec) {
+; CHECK-LABEL: test_8xi32_to_4xi32_perm_mask3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,1]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <4 x i32> <i32 5, i32 3, i32 2, i32 5>
ret <4 x i32> %res
}
define <4 x i32> @test_masked_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
ret <4 x float> %res
}
-define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {\r
-; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mem_mask1:\r
-; CHECK: # %bb.0:\r
-; CHECK-NEXT: vmovaps (%rdi), %ymm2\r
-; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3\r
-; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3]\r
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3\r
-; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1\r
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = xmm2[2,3,3,2]\r
+define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mem_mask1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %ymm2
+; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
+; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3]
+; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = xmm2[2,3,3,2]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
ret <4 x float> %res
}
-define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %mask) {\r
-; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1:\r
-; CHECK: # %bb.0:\r
-; CHECK-NEXT: vmovaps (%rdi), %ymm1\r
-; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2\r
-; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3]\r
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2\r
-; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1\r
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm1[2,3,3,2]\r
+define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %ymm1
+; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
+; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm1[2,3,3,2]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
%res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
ret <4 x float> %res
}
-define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {\r
-; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask1:\r
-; CHECK: # %bb.0:\r
-; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3\r
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0\r
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,2]\r
-; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2],xmm0[3]\r
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3\r
-; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1\r
-; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}\r
+define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,2]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2],xmm0[3]
+; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
+; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 8, i32 6, i32 10, i32 6>
ret <4 x float> %res
}
-define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %mask) {\r
-; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask1:\r
-; CHECK: # %bb.0:\r
-; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2\r
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0\r
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,2]\r
-; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]\r
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2\r
-; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1\r
-; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}\r
+define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,2]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
+; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 8, i32 6, i32 10, i32 6>
%res = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 10, i32 2, i32 11, i32 6>
ret <4 x float> %res
}
-define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {\r
-; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask3:\r
-; CHECK: # %bb.0:\r
-; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,2,4,6,4,6,6,7]\r
-; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm3\r
-; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0\r
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]\r
-; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3]\r
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3\r
-; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1\r
-; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}\r
+define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,2,4,6,4,6,6,7]
+; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm3
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3]
+; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
+; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 10, i32 2, i32 11, i32 6>
ret <4 x float> %res
}
-define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %mask) {\r
-; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask3:\r
-; CHECK: # %bb.0:\r
-; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]\r
-; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm2\r
-; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0\r
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]\r
-; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]\r
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2\r
-; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1\r
-; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}\r
+define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
+; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm2
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
+; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 10, i32 2, i32 11, i32 6>
%res = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 14, i32 6, i32 7, i32 11>
ret <4 x float> %res
}
-define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {\r
-; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mem_mask0:\r
-; CHECK: # %bb.0:\r
-; CHECK-NEXT: vmovaps (%rdi), %zmm2\r
-; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3\r
-; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[0,2,3,3]\r
-; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm2\r
-; CHECK-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[3,1,2,3]\r
-; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3]\r
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3\r
-; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1\r
-; CHECK-NEXT: vmovaps %xmm2, %xmm0 {%k1}\r
+define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mem_mask0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %zmm2
+; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
+; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[0,2,3,3]
+; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm2
+; CHECK-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[3,1,2,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3]
+; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
+; CHECK-NEXT: vmovaps %xmm2, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
ret <4 x float> %res
}
-define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %mask) {\r
-; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0:\r
-; CHECK: # %bb.0:\r
-; CHECK-NEXT: vmovaps (%rdi), %zmm1\r
-; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2\r
-; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,3,3]\r
-; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1\r
-; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]\r
-; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2],xmm1[3]\r
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2\r
-; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1\r
-; CHECK-NEXT: vmovaps %xmm1, %xmm0 {%k1} {z}\r
+define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %zmm1
+; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
+; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,3,3]
+; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1
+; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2],xmm1[3]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1
+; CHECK-NEXT: vmovaps %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp