From 29e646fe655cb63a23a08e7213599e51ef564ab5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 9 Feb 2020 21:49:37 +0000 Subject: [PATCH] [X86] combineConcatVectorOps - combine VROTLI/VROTRI ops Fix issue mentioned on rGe82e17d4d4ca - non-AVX512BW targets failed to concatenate 256-bit rotations back to 512-bits (split during shuffle lowering as they don't have v32i16/v64i8 types). --- llvm/lib/Target/X86/X86ISelLowering.cpp | 14 +++++++++++ llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll | 32 +++++++------------------ llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll | 29 ++++------------------ 3 files changed, 26 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e7fc997..ad1055e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -45952,6 +45952,20 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, return DAG.getBitcast(VT, Res); } break; + case X86ISD::VROTLI: + case X86ISD::VROTRI: + if (VT.is512BitVector() && Subtarget.useAVX512Regs() && + llvm::all_of(Ops, [Op0](SDValue Op) { + return Op0.getOperand(1) == Op.getOperand(1); + })) { + SmallVector Src; + for (unsigned i = 0; i != NumOps; ++i) + Src.push_back(Ops[i].getOperand(0)); + return DAG.getNode(Op0.getOpcode(), DL, VT, + DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Src), + Op0.getOperand(1)); + } + break; case X86ISD::PACKUS: if (NumOps == 2 && VT.is256BitVector() && Subtarget.hasInt256()) { SmallVector LHS, RHS; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll index ee528e5..c513a57 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -198,35 +198,19 @@ define <32 x i16> @shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19 } define <32 x i16> @shuffle_v32i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14_17_16_19_18_21_20_23_22_25_24_27_26_29_28_31_30(<32 x i16> %a) { -; KNL-LABEL: shuffle_v32i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14_17_16_19_18_21_20_23_22_25_24_27_26_29_28_31_30: -; KNL: ## %bb.0: -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; KNL-NEXT: vprold $16, %zmm1, %zmm1 -; KNL-NEXT: vprold $16, %zmm0, %zmm0 -; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; KNL-NEXT: retq -; -; SKX-LABEL: shuffle_v32i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14_17_16_19_18_21_20_23_22_25_24_27_26_29_28_31_30: -; SKX: ## %bb.0: -; SKX-NEXT: vprold $16, %zmm0, %zmm0 -; SKX-NEXT: retq +; ALL-LABEL: shuffle_v32i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14_17_16_19_18_21_20_23_22_25_24_27_26_29_28_31_30: +; ALL: ## %bb.0: +; ALL-NEXT: vprold $16, %zmm0, %zmm0 +; ALL-NEXT: retq %shuffle = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> ret <32 x i16> %shuffle } define <32 x i16> @shuffle_v32i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30(<32 x i16> %a) { -; KNL-LABEL: shuffle_v32i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30: -; KNL: ## %bb.0: -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; KNL-NEXT: vprolq $48, %zmm1, %zmm1 -; KNL-NEXT: vprolq $48, %zmm0, %zmm0 -; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; KNL-NEXT: retq -; -; SKX-LABEL: shuffle_v32i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30: -; SKX: ## %bb.0: -; SKX-NEXT: vprolq $48, %zmm0, %zmm0 -; SKX-NEXT: retq +; ALL-LABEL: shuffle_v32i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30: +; ALL: ## %bb.0: +; ALL-NEXT: vprolq $48, %zmm0, %zmm0 +; ALL-NEXT: retq %shuffle = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> ret <32 x i16> %shuffle } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll index 365dbb8..8cbac72 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -200,31 +200,10 @@ define <64 x i8> @shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_ ; PR44379 define <64 x i8> @shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57(<64 x i8> %a) { -; AVX512F-LABEL: shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vprolq $16, %zmm1, %zmm1 -; AVX512F-NEXT: vprolq $16, %zmm0, %zmm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vprolq $16, %zmm0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512DQ-LABEL: shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512DQ-NEXT: vprolq $16, %zmm1, %zmm1 -; AVX512DQ-NEXT: vprolq $16, %zmm0, %zmm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512DQ-NEXT: retq -; -; AVX512VBMI-LABEL: shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57: -; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vprolq $16, %zmm0, %zmm0 -; AVX512VBMI-NEXT: retq +; ALL-LABEL: shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57: +; ALL: # %bb.0: +; ALL-NEXT: vprolq $16, %zmm0, %zmm0 +; ALL-NEXT: retq %shuffle = shufflevector <64 x i8> %a, <64 x i8> undef, <64 x i32> ret <64 x i8> %shuffle } -- 2.7.4