From 7ccd21ce4f8cadd364beb56d07c98669a7f71791 Mon Sep 17 00:00:00 2001 From: Han Zhu Date: Wed, 5 Apr 2023 15:01:04 -0700 Subject: [PATCH] [isel] Pre-commit test for pr61964 fix --- llvm/test/CodeGen/X86/pr61964.ll | 110 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 llvm/test/CodeGen/X86/pr61964.ll diff --git a/llvm/test/CodeGen/X86/pr61964.ll b/llvm/test/CodeGen/X86/pr61964.ll new file mode 100644 index 0000000..6cc9d6d --- /dev/null +++ b/llvm/test/CodeGen/X86/pr61964.ll @@ -0,0 +1,110 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX1OR2,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX1OR2,AVX2OR512VL,AVX2,AVX2-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX1OR2,AVX2OR512VL,AVX2,AVX2-FAST,AVX2-FAST-ALL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX1OR2,AVX2OR512VL,AVX2,AVX2-FAST,AVX2-FAST-PERLANE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VL-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VL-FAST,AVX512VL-FAST-CROSSLANE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VL-FAST,AVX512VL-FAST-PERLANE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX2 + +define { <8 x i32>, <8 x i32> } @splitTransposeDecode_8_avx2(<16 x i16> %a, <16 x i16> %b) { +; AVX1-LABEL: splitTransposeDecode_8_avx2: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3] +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2 +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm1 +; AVX1-NEXT: vmovaps %ymm2, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splitTransposeDecode_8_avx2: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[0,2,0,2] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <0,1,8,9,2,3,10,11,u,u,u,u,u,u,u,u,4,5,12,13,6,7,14,15,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpermq {{.*#+}} ymm4 = ymm0[0,2,0,2] +; AVX2-NEXT: vpshufb %ymm3, %ymm4, %ymm4 +; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm4[0],ymm2[0],ymm4[1],ymm2[1],ymm4[2],ymm2[2],ymm4[3],ymm2[3],ymm4[8],ymm2[8],ymm4[9],ymm2[9],ymm4[10],ymm2[10],ymm4[11],ymm2[11] +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,1,3] +; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,1,3] +; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] +; AVX2-NEXT: vmovdqa %ymm2, %ymm0 +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: splitTransposeDecode_8_avx2: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,16,8,24,1,17,9,25,2,18,10,26,3,19,11,27] +; AVX512VL-NEXT: vpermi2w %ymm1, %ymm0, %ymm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [4,20,12,28,5,21,13,29,6,22,14,30,7,23,15,31] +; AVX512VL-NEXT: vpermi2w %ymm1, %ymm0, %ymm3 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm3, %ymm1 +; AVX512VL-NEXT: retq +; +; XOPAVX1-LABEL: splitTransposeDecode_8_avx2: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] +; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3] +; XOPAVX1-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2 +; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] +; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm1 +; XOPAVX1-NEXT: vmovaps %ymm2, %ymm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: splitTransposeDecode_8_avx2: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[0,2,0,2] +; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <0,1,8,9,2,3,10,11,u,u,u,u,u,u,u,u,4,5,12,13,6,7,14,15,u,u,u,u,u,u,u,u> +; XOPAVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm4 = ymm0[0,2,0,2] +; XOPAVX2-NEXT: vpshufb %ymm3, %ymm4, %ymm4 +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm4[0],ymm2[0],ymm4[1],ymm2[1],ymm4[2],ymm2[2],ymm4[3],ymm2[3],ymm4[8],ymm2[8],ymm4[9],ymm2[9],ymm4[10],ymm2[10],ymm4[11],ymm2[11] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,1,3] +; XOPAVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1 +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,1,3] +; XOPAVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] +; XOPAVX2-NEXT: vmovdqa %ymm2, %ymm0 +; XOPAVX2-NEXT: retq + %shuffle.i = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + %shuffle.i59 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + %1 = bitcast <16 x i16> %shuffle.i to <8 x i32> + %2 = shufflevector <8 x i32> %1, <8 x i32> poison, <8 x i32> + %3 = bitcast <16 x i16> %shuffle.i59 to <8 x i32> + %4 = shufflevector <8 x i32> %3, <8 x i32> poison, <8 x i32> + %5 = insertvalue { <8 x i32>, <8 x i32> } undef, <8 x i32> %2, 0 + %6 = insertvalue { <8 x i32>, <8 x i32> } %5, <8 x i32> %4, 1 + ret { <8 x i32>, <8 x i32> } %6 +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ALL: {{.*}} +; AVX1OR2: {{.*}} +; AVX2-FAST: {{.*}} +; AVX2-FAST-ALL: {{.*}} +; AVX2-FAST-PERLANE: {{.*}} +; AVX2-SLOW: {{.*}} +; AVX2OR512VL: {{.*}} +; AVX512VL-FAST: {{.*}} +; AVX512VL-FAST-CROSSLANE: {{.*}} +; AVX512VL-FAST-PERLANE: {{.*}} +; AVX512VL-SLOW: {{.*}} +; XOP: {{.*}} -- 2.7.4