[X86] Recognise ROTLI/ROTRI rotations as faux shuffles

author Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 9 Feb 2020 12:25:19 +0000 (12:25 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 9 Feb 2020 12:25:49 +0000 (12:25 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 9 Feb 2020 12:25:19 +0000 (12:25 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 9 Feb 2020 12:25:49 +0000 (12:25 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 27363a9..d209b1e 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7443,6 +7443,24 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
      }
      return true;
    }
+  case X86ISD::VROTLI:
+  case X86ISD::VROTRI: {
+    // We can only decode 'whole byte' bit rotates as shuffles.
+    uint64_t RotateVal = N.getConstantOperandAPInt(1).urem(NumBitsPerElt);
+    if ((RotateVal % 8) != 0)
+      return false;
+    Ops.push_back(N.getOperand(0));
+    int NumBytesPerElt = NumBitsPerElt / 8;
+    int Offset = RotateVal / 8;
+    Offset = (X86ISD::VROTLI == Opcode ? NumBytesPerElt - Offset : Offset);
+    for (int i = 0; i != NumElts; ++i) {
+      int BaseIdx = i * NumBytesPerElt;
+      for (int j = 0; j != NumBytesPerElt; ++j) {
+        Mask.push_back(BaseIdx + ((Offset + j) % NumBytesPerElt));
+      }
+    }
+    return true;
+  }
    case X86ISD::VBROADCAST: {
      SDValue Src = N.getOperand(0);
      MVT SrcVT = Src.getSimpleValueType();
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll

index fd765fe..e42691d 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
@@ -79,8 +79,7 @@ define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_unpcklwd(<16 x i16> %a
  define <16 x i8> @combine_shuffle_vrotri_v2i64(<2 x i64> %a0) {
  ; CHECK-LABEL: combine_shuffle_vrotri_v2i64:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vprorq $48, %xmm0, %xmm0
-; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[13,12,11,10,9,8,15,14,5,4,3,2,1,0,7,6]
  ; CHECK-NEXT:    ret{{[l|q]}}
    %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a0, <2 x i64> %a0, <2 x i64> <i64 48, i64 48>)
    %2 = bitcast <2 x i64> %1 to <16 x i8>
@@ -92,8 +91,7 @@ declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
  define <16 x i8> @combine_shuffle_vrotli_v4i32(<4 x i32> %a0) {
  ; CHECK-LABEL: combine_shuffle_vrotli_v4i32:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vprold $8, %xmm0, %xmm0
-; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,13,12,15,10,9,8,11,6,5,4,7,2,1,0,3]
  ; CHECK-NEXT:    ret{{[l|q]}}
    %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a0, <4 x i32> %a0, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
    %2 = bitcast <4 x i32> %1 to <16 x i8>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll

index 7320036..6460c52 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -255,8 +255,7 @@ define <4 x i32> @combine_vpperm_10zz32BA(<4 x i32> %a0, <4 x i32> %a1) {
  define <16 x i8> @combine_shuffle_proti_v2i64(<2 x i64> %a0) {
  ; CHECK-LABEL: combine_shuffle_proti_v2i64:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vprotq $16, %xmm0, %xmm0
-; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[13,12,11,10,9,8,15,14,5,4,3,2,1,0,7,6]
  ; CHECK-NEXT:    ret{{[l|q]}}
    %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a0, <2 x i64> %a0, <2 x i64> <i64 48, i64 48>)
    %2 = bitcast <2 x i64> %1 to <16 x i8>
@@ -268,8 +267,7 @@ declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
  define <16 x i8> @combine_shuffle_proti_v4i32(<4 x i32> %a0) {
  ; CHECK-LABEL: combine_shuffle_proti_v4i32:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vprotd $8, %xmm0, %xmm0
-; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,13,12,15,10,9,8,11,6,5,4,7,2,1,0,3]
  ; CHECK-NEXT:    ret{{[l|q]}}
    %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a0, <4 x i32> %a0, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
    %2 = bitcast <4 x i32> %1 to <16 x i8>
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 9 Feb 2020 12:25:19 +0000 (12:25 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 9 Feb 2020 12:25:49 +0000 (12:25 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll		patch \| blob \| history