From: Quentin Colombet <qcolombet@apple.com>
Date: Thu, 12 Mar 2015 19:34:12 +0000 (+0000)
Subject: [X86] Fix a regression introduced by r223641.
X-Git-Tag: llvmorg-3.7.0-rc1~9400
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f59b2d034cd7da4f9de6c56fea311ee0af849598;p=platform%2Fupstream%2Fllvm.git

[X86] Fix a regression introduced by r223641.
The permps and permd instructions have their operands swapped compared to the
intrinsic definition. Therefore, they do not fall into the INTR_TYPE_2OP
category.

I did not create a new category for those two, as they are the only one AFAICT
in that case.

<rdar://problem/20108262>

llvm-svn: 232085
---

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 366e4b9e..0533afc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14711,6 +14711,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
   switch (IntNo) {
   default: return SDValue();    // Don't custom lower most intrinsics.
 
+  case Intrinsic::x86_avx2_permd:
+  case Intrinsic::x86_avx2_permps:
+    // Operands intentionally swapped. Mask is last operand to intrinsic,
+    // but second operand for node/instruction.
+    return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
+                       Op.getOperand(2), Op.getOperand(1));
+
   case Intrinsic::x86_avx512_mask_valign_q_512:
   case Intrinsic::x86_avx512_mask_valign_d_512:
     // Vector source operands are swapped.
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index e436811..42256b2 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -175,8 +175,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_packsswb,     INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx2_packusdw,     INTR_TYPE_2OP, X86ISD::PACKUS, 0),
   X86_INTRINSIC_DATA(avx2_packuswb,     INTR_TYPE_2OP, X86ISD::PACKUS, 0),
-  X86_INTRINSIC_DATA(avx2_permd,        INTR_TYPE_2OP, X86ISD::VPERMV, 0),
-  X86_INTRINSIC_DATA(avx2_permps,       INTR_TYPE_2OP, X86ISD::VPERMV, 0),
   X86_INTRINSIC_DATA(avx2_phadd_d,      INTR_TYPE_2OP, X86ISD::HADD, 0),
   X86_INTRINSIC_DATA(avx2_phadd_w,      INTR_TYPE_2OP, X86ISD::HADD, 0),
   X86_INTRINSIC_DATA(avx2_phsub_d,      INTR_TYPE_2OP, X86ISD::HSUB, 0),
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
index b7e2b74..7b2d358 100644
--- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -746,7 +746,10 @@ declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
 
 
 define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
-  ; CHECK: vpermd
+  ; Check that the arguments are swapped between the intrinsic definition
+  ; and its lowering. Indeed, the offsets are the first source in
+  ; the instruction.
+  ; CHECK: vpermd %ymm0, %ymm1, %ymm0
   %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   ret <8 x i32> %res
 }
@@ -754,7 +757,10 @@ declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
 
 
 define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) {
-  ; CHECK: vpermps
+  ; Check that the arguments are swapped between the intrinsic definition
+  ; and its lowering. Indeed, the offsets are the first source in
+  ; the instruction.
+  ; CHECK: vpermps %ymm0, %ymm1, %ymm0
   %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
diff --git a/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll b/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll
index fa81bf1..7f38494 100644
--- a/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll
@@ -442,7 +442,7 @@ define <8 x i32> @stack_fold_permd(<8 x i32> %a0, <8 x i32> %a1) {
   ;CHECK-LABEL: stack_fold_permd
   ;CHECK:   vpermd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1)
+  %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0)
   ret <8 x i32> %2
 }
 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
@@ -461,7 +461,7 @@ define <8 x float> @stack_fold_permps(<8 x float> %a0, <8 x float> %a1) {
   ;CHECK-LABEL: stack_fold_permps
   ;CHECK:       vpermps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1)
+  %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x float> %a0)
   ret <8 x float> %2
 }
 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly