[VE] Support lowering to NND instruction

author Kazushi (Jam) Marukawa <marukawa@nec.com>

Tue, 9 Jun 2020 08:17:20 +0000 (10:17 +0200)

committer Simon Moll <simon.moll@emea.nec.com>

Tue, 9 Jun 2020 08:18:14 +0000 (10:18 +0200)
author Kazushi (Jam) Marukawa <marukawa@nec.com>
Tue, 9 Jun 2020 08:17:20 +0000 (10:17 +0200)
committer Simon Moll <simon.moll@emea.nec.com>
Tue, 9 Jun 2020 08:18:14 +0000 (10:18 +0200)
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp

index 6648d3928c1f7ddd81f3f758fcdd29efb9460390..9abffae413d4e43f2f83948acb78f3f7a5d8962f 100644 (file)
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -531,6 +531,30 @@ bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
    return true;
  }
  
+bool VETargetLowering::hasAndNot(SDValue Y) const {
+  EVT VT = Y.getValueType();
+
+  // VE doesn't have vector and not instruction.
+  if (VT.isVector())
+    return false;
+
+  // VE allows different immediate values for X and Y where ~X & Y.
+  // Only simm7 works for X, and only mimm works for Y on VE.  However, this
+  // function is used to check whether an immediate value is OK for and-not
+  // instruction as both X and Y.  Generating additional instruction to
+  // retrieve an immediate value is no good since the purpose of this
+  // function is to convert a series of 3 instructions to another series of
+  // 3 instructions with better parallelism.  Therefore, we return false
+  // for all immediate values now.
+  // FIXME: Change hasAndNot function to have two operands to make it work
+  //        correctly with Aurora VE.
+  if (auto *C = dyn_cast<ConstantSDNode>(Y))
+    return false;
+
+  // It's ok for generic registers.
+  return true;
+}
+
  VETargetLowering::VETargetLowering(const TargetMachine &TM,
                                     const VESubtarget &STI)
      : TargetLowering(TM), Subtarget(&STI) {
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h

index 097960f05a830635f09b3f4b9cb3d66000b62a37..4633220efaa184f89c70850f8633ebdd395330f2 100644 (file)
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -101,6 +101,8 @@ public:
  
    // Block s/udiv lowering for now
    bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; }
+
+  bool hasAndNot(SDValue Y) const override;
  };
  } // namespace llvm
  
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td

index e92cbb1e6ca214ed5dfca9a42e64976f889cccc4..76b3c657c124c67dd5bdf73e1b8c71d820e9dbe5 100644 (file)
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -1025,7 +1025,9 @@ let isCodeGenOnly = 1 in defm XOR32 : RRm<"xor", 0x46, I32, i32, xor>;
  defm EQV : RRm<"eqv", 0x47, I64, i64>;
  
  // Section 8.5.5 - NND (Negate AND)
-defm NND : RRNCm<"nnd", 0x54, I64, i64>;
+def and_not : PatFrags<(ops node:$x, node:$y),
+                       [(and (not node:$x), node:$y)]>;
+defm NND : RRNCm<"nnd", 0x54, I64, i64, and_not>;
  
  // Section 8.5.6 - MRG (Merge)
  defm MRG : RRMRGm<"mrg", 0x56, I64, i64>;
diff --git a/llvm/test/CodeGen/VE/cttz.ll b/llvm/test/CodeGen/VE/cttz.ll

index 82df4ee109c513dcd5db35899b2a9f4f8ff8e9e2..4b79a0f988e862badc108f977c27787124c67fe3 100644 (file)
--- a/llvm/test/CodeGen/VE/cttz.ll
+++ b/llvm/test/CodeGen/VE/cttz.ll
@@ -4,8 +4,7 @@ define i64 @func1(i64 %p) {
  ; CHECK-LABEL: func1:
  ; CHECK:       .LBB{{[0-9]+}}_2:
  ; CHECK-NEXT:    lea %s1, -1(, %s0)
-; CHECK-NEXT:    xor %s0, -1, %s0
-; CHECK-NEXT:    and %s0, %s0, %s1
+; CHECK-NEXT:    nnd %s0, %s0, %s1
  ; CHECK-NEXT:    pcnt %s0, %s0
  ; CHECK-NEXT:    or %s11, 0, %s9
    %r = tail call i64 @llvm.cttz.i64(i64 %p, i1 true)
diff --git a/llvm/test/CodeGen/VE/nnd.ll b/llvm/test/CodeGen/VE/nnd.ll

new file mode 100644 (file)

index 0000000..aea10d4
--- /dev/null
+++ b/llvm/test/CodeGen/VE/nnd.ll
@@ -0,0 +1,225 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
+
+define signext i8 @func8s(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: func8s:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, -1, %s0
+; CHECK-NEXT:    and %s0, %s0, %s1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i8 %a, -1
+  %res = and i8 %not, %b
+  ret i8 %res
+}
+
+define zeroext i8 @func8z(i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: func8z:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, -1, %s0
+; CHECK-NEXT:    and %s0, %s1, %s0
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i8 %a, -1
+  %res = and i8 %b, %not
+  ret i8 %res
+}
+
+define signext i8 @funci8s(i8 signext %a) {
+; CHECK-LABEL: funci8s:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, -1, %s0
+; CHECK-NEXT:    and %s0, 5, %s0
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i8 %a, -1
+  %res = and i8 %not, 5
+  ret i8 %res
+}
+
+define zeroext i8 @funci8z(i8 zeroext %a) {
+; CHECK-LABEL: funci8z:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, -1, %s0
+; CHECK-NEXT:    lea %s1, 251
+; CHECK-NEXT:    and %s0, %s0, %s1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i8 %a, -1
+  %res = and i8 -5, %not
+  ret i8 %res
+}
+
+define signext i16 @func16s(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: func16s:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, -1, %s0
+; CHECK-NEXT:    and %s0, %s0, %s1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i16 %a, -1
+  %res = and i16 %not, %b
+  ret i16 %res
+}
+
+define zeroext i16 @func16z(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: func16z:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, -1, %s0
+; CHECK-NEXT:    and %s0, %s1, %s0
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i16 %a, -1
+  %res = and i16 %b, %not
+  ret i16 %res
+}
+
+define signext i16 @funci16s(i16 signext %a) {
+; CHECK-LABEL: funci16s:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, -1, %s0
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i16 %a, -1
+  %res = and i16 %not, 65535
+  ret i16 %res
+}
+
+define zeroext i16 @funci16z(i16 zeroext %a) {
+; CHECK-LABEL: funci16z:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, -1, %s0
+; CHECK-NEXT:    and %s0, %s0, (52)0
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i16 %a, -1
+  %res = and i16 4095, %not
+  ret i16 %res
+}
+
+define signext i32 @func32s(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: func32s:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, -1, %s0
+; CHECK-NEXT:    and %s0, %s0, %s1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i32 %a, -1
+  %res = and i32 %not, %b
+  ret i32 %res
+}
+
+define zeroext i32 @func32z(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: func32z:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, -1, %s0
+; CHECK-NEXT:    and %s0, %s0, %s1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i32 %a, -1
+  %res = and i32 %not, %b
+  ret i32 %res
+}
+
+define signext i32 @funci32s(i32 signext %a) {
+; CHECK-LABEL: funci32s:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, -1, %s0
+; CHECK-NEXT:    and %s0, %s0, (36)0
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i32 %a, -1
+  %res = and i32 %not, 268435455
+  ret i32 %res
+}
+
+define zeroext i32 @funci32z(i32 zeroext %a) {
+; CHECK-LABEL: funci32z:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, -1, %s0
+; CHECK-NEXT:    and %s0, %s0, (36)0
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i32 %a, -1
+  %res = and i32 %not, 268435455
+  ret i32 %res
+}
+
+define i64 @func64(i64 %a, i64 %b) {
+; CHECK-LABEL: func64:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    nnd %s0, %s0, %s1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i64 %a, -1
+  %res = and i64 %not, %b
+  ret i64 %res
+}
+
+define i64 @func64_2(i64 %a, i64 %b) {
+; CHECK-LABEL: func64_2:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    nnd %s0, %s1, %s0
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i64 %b, -1
+  %res = and i64 %not, %a
+  ret i64 %res
+}
+
+define i64 @func64i(i64 %a) {
+; CHECK-LABEL: func64i:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    nnd %s0, %s0, (24)0
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i64 %a, -1
+  %res = and i64 %not, 1099511627775
+  ret i64 %res
+}
+
+define i128 @func128(i128 %a, i128 %b) {
+; CHECK-LABEL: func128:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    nnd %s0, %s0, %s2
+; CHECK-NEXT:    nnd %s1, %s1, %s3
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i128 %a, -1
+  %res = and i128 %b, %not
+  ret i128 %res
+}
+
+define i128 @funci128(i128 %a) {
+; CHECK-LABEL: funci128:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s1, 5, (0)1
+; CHECK-NEXT:    nnd %s0, %s0, %s1
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %not = xor i128 %a, -1
+  %res = and i128 %not, 5
+  ret i128 %res
+}
+
+define i64 @func64_nnd_fold(i64 %x, i64 %y, i64 %m) {
+; CHECK-LABEL: func64_nnd_fold:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    nnd %s1, %s2, %s1
+; CHECK-NEXT:    and %s0, %s0, %s2
+; CHECK-NEXT:    or %s0, %s0, %s1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %D = xor i64 %x, %y
+  %A = and i64 %D, %m
+  %res = xor i64 %A, %y
+  ret i64 %res
+}
+
+define i64 @func64iy_nnd_fold(i64 %x, i64 %m) {
+; CHECK-LABEL: func64iy_nnd_fold:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    nnd %s0, %s0, %s1
+; CHECK-NEXT:    or %s1, -64, %s1
+; CHECK-NEXT:    nnd %s0, %s0, %s1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %D = xor i64 %x, -64
+  %A = and i64 %D, %m
+  %res = xor i64 %A, -64
+  ret i64 %res
+}
+
+define i64 @func64im_nnd_fold(i64 %x, i64 %y) {
+; CHECK-LABEL: func64im_nnd_fold:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    xor %s0, %s0, %s1
+; CHECK-NEXT:    and %s0, 30, %s0
+; CHECK-NEXT:    xor %s0, %s0, %s1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %D = xor i64 %x, %y
+  %A = and i64 %D, 30
+  %res = xor i64 %A, %y
+  ret i64 %res
+}
author	Kazushi (Jam) Marukawa <marukawa@nec.com>
	Tue, 9 Jun 2020 08:17:20 +0000 (10:17 +0200)
committer	Simon Moll <simon.moll@emea.nec.com>
	Tue, 9 Jun 2020 08:18:14 +0000 (10:18 +0200)
llvm/lib/Target/VE/VEISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/VE/VEISelLowering.h		patch \| blob \| history
llvm/lib/Target/VE/VEInstrInfo.td		patch \| blob \| history
llvm/test/CodeGen/VE/cttz.ll		patch \| blob \| history
llvm/test/CodeGen/VE/nnd.ll	[new file with mode: 0644]	patch \| blob