From: Craig Topper Date: Sat, 4 Nov 2017 18:10:03 +0000 (+0000) Subject: [X86] Teach EVEX->VEX pass to turn SHUFI32X4/SHUFF32X4/SHUFI64X/SHUFF64X2 into VPERM2... X-Git-Tag: llvmorg-6.0.0-rc1~4140 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e5d44cefea140226185e9e80cdfbcb4492f30427;p=platform%2Fupstream%2Fllvm.git [X86] Teach EVEX->VEX pass to turn SHUFI32X4/SHUFF32X4/SHUFI64X/SHUFF64X2 into VPERM2F128/VPERM2I128. This recovers some of the tests that were changed by r317403. llvm-svn: 317410 --- diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToVex.cpp index 744510a..6dd4631 100644 --- a/llvm/lib/Target/X86/X86EvexToVex.cpp +++ b/llvm/lib/Target/X86/X86EvexToVex.cpp @@ -171,7 +171,7 @@ static void performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { case X86::VALIGNDZ128rri: case X86::VALIGNDZ128rmi: case X86::VALIGNQZ128rri: - case X86::VALIGNQZ128rmi: + case X86::VALIGNQZ128rmi: { assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) && "Unexpected new opcode!"); unsigned Scale = (Opc == X86::VALIGNQZ128rri || @@ -180,6 +180,24 @@ static void performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { Imm.setImm(Imm.getImm() * Scale); break; } + case X86::VSHUFF32X4Z256rmi: + case X86::VSHUFF32X4Z256rri: + case X86::VSHUFF64X2Z256rmi: + case X86::VSHUFF64X2Z256rri: + case X86::VSHUFI32X4Z256rmi: + case X86::VSHUFI32X4Z256rri: + case X86::VSHUFI64X2Z256rmi: + case X86::VSHUFI64X2Z256rri: { + assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr || + NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) && + "Unexpected new opcode!"); + MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1); + int64_t ImmVal = Imm.getImm(); + // Set bit 5, move bit 1 to bit 4, copy bit 0. + Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1)); + break; + } + } } diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index a3e6a18..44d13db 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -3447,8 +3447,8 @@ define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x dou ; ; SKX-LABEL: test_perm2f128: ; SKX: # BB#0: -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index 8febe04..cec8ca9 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -2531,8 +2531,8 @@ define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; ; SKX-LABEL: test_perm2i128: ; SKX: # BB#0: -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 9098ca3..35fecf8 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2729,8 +2729,8 @@ define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x ; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xd1,0x16] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7] -; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc1,0x16] -; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; CHECK-NEXT: vperm2f128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x30] +; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2752,7 +2752,7 @@ define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 ; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xd1,0x16] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3] -; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc1,0x16] +; CHECK-NEXT: vperm2f128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x30] ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0] @@ -2773,8 +2773,8 @@ define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xd1,0x16] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7] -; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc1,0x16] -; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; CHECK-NEXT: vperm2i128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x46,0xc1,0x30] +; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) @@ -2791,7 +2791,7 @@ define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xd1,0x16] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3] -; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc1,0x16] +; CHECK-NEXT: vperm2i128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x46,0xc1,0x30] ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll index cbc190d..7ef5bee 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -3975,7 +3975,7 @@ define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_b ; ; AVX512VL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; AVX512VL-NEXT: retq %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index c5c2312..3c69f61 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -1682,17 +1682,11 @@ define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: -; AVX2: # BB#0: -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle } @@ -1708,17 +1702,11 @@ define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: -; AVX2: # BB#0: -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index 6e0e80b..56567c7 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -418,45 +418,21 @@ define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) { } define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_3254: -; AVX1: # BB#0: -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_3254: -; AVX2: # BB#0: -; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_3254: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_3254: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_3276: -; AVX1: # BB#0: -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_3276: -; AVX2: # BB#0: -; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_3276: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_3276: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } @@ -1077,7 +1053,7 @@ define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512VL-LABEL: shuffle_v4i64_3254: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -1099,7 +1075,7 @@ define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512VL-LABEL: shuffle_v4i64_3276: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll index 38891b46..e4234c0 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -768,33 +768,21 @@ define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) { } define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) { -; AVX1OR2-LABEL: shuffle_v8f32_7654fedc: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_7654fedc: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v8f32_7654fedc: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) { -; AVX1OR2-LABEL: shuffle_v8f32_fedc7654: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] -; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_fedc7654: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] -; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v8f32_fedc7654: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } @@ -1809,7 +1797,7 @@ define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) { ; ; AVX512VL-LABEL: shuffle_v8i32_7654fedc: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1825,7 +1813,7 @@ define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) { ; ; AVX512VL-LABEL: shuffle_v8i32_fedc7654: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -2208,7 +2196,7 @@ define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) { ; ; AVX512VL-LABEL: concat_v8i32_4567CDEF_bc: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; AVX512VL-NEXT: retq %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> @@ -2220,15 +2208,10 @@ define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) { } define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) { -; AVX1OR2-LABEL: concat_v8f32_4567CDEF_bc: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: concat_v8f32_4567CDEF_bc: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX512VL-NEXT: retq +; ALL-LABEL: concat_v8f32_4567CDEF_bc: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; ALL-NEXT: retq %a0 = bitcast <8 x float> %f0 to <4 x i64> %a1 = bitcast <8 x float> %f1 to <8 x i32> %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp index 848e59c..05f30fa 100644 --- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp @@ -155,6 +155,14 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector &Table, {"VALIGNQZ128rri", "VPALIGNRrri", true}, {"VALIGNDZ128rmi", "VPALIGNRrmi", true}, {"VALIGNQZ128rmi", "VPALIGNRrmi", true}, + {"VSHUFF32X4Z256rmi", "VPERM2F128rm", false}, + {"VSHUFF32X4Z256rri", "VPERM2F128rr", false}, + {"VSHUFF64X2Z256rmi", "VPERM2F128rm", false}, + {"VSHUFF64X2Z256rri", "VPERM2F128rr", false}, + {"VSHUFI32X4Z256rmi", "VPERM2I128rm", false}, + {"VSHUFI32X4Z256rri", "VPERM2I128rr", false}, + {"VSHUFI64X2Z256rmi", "VPERM2I128rm", false}, + {"VSHUFI64X2Z256rri", "VPERM2I128rr", false}, }; // Print the manually added entries