x[AArch64][GlobalISel] Enable vector support for G_SELECT->G_FMAXIMUM/MINIMUM.

author Amara Emerson <amara@apple.com>

Mon, 3 Oct 2022 00:46:20 +0000 (01:46 +0100)

committer Amara Emerson <amara@apple.com>

Mon, 3 Oct 2022 20:39:52 +0000 (21:39 +0100)
author Amara Emerson <amara@apple.com>
Mon, 3 Oct 2022 00:46:20 +0000 (01:46 +0100)
committer Amara Emerson <amara@apple.com>
Mon, 3 Oct 2022 20:39:52 +0000 (21:39 +0100)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td

index fcf6418..4cbaa66 100644 (file)
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1031,7 +1031,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
      form_bitfield_extract, constant_fold, fabs_fneg_fold,
      intdiv_combines, mulh_combines, redundant_neg_operands,
      and_or_disjoint_mask, fma_combines, fold_binop_into_select,
-    sub_add_reg]>;
+    sub_add_reg, select_to_minmax]>;
  
  // A combine group used to for prelegalizer combiners at -O0. The combines in
  // this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

index 83d14ba..1fdc0d2 100644 (file)
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5985,8 +5985,7 @@ bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
    // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
    LLT DstTy = MRI.getType(Dst);
    // Bail out early on pointers, since we'll never want to fold to a min/max.
-  // TODO: Handle vectors.
-  if (DstTy.isPointer() || DstTy.isVector())
+  if (DstTy.isPointer())
      return false;
    // Match a floating point compare with a less-than/greater-than predicate.
    // TODO: Allow multiple users of the compare if they are all selects.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

index 541d4c6..d7448e4 100644 (file)
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -803,10 +803,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
        .libcallFor({s128})
        .minScalar(0, MinFPScalar);
  
-  // TODO: Vector types.
    getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
-      .legalFor({MinFPScalar, s32, s64})
-      .minScalar(0, MinFPScalar);
+      .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
+      .legalIf([=](const LegalityQuery &Query) {
+        const auto &Ty = Query.Types[0];
+        return (Ty == v8s16 || Ty == v4s16) && HasFP16;
+      })
+      .minScalar(0, MinFPScalar)
+      .clampNumElements(0, v4s16, v8s16)
+      .clampNumElements(0, v2s32, v4s32)
+      .clampNumElements(0, v2s64, v2s64);
  
    // TODO: Libcall support for s128.
    // TODO: s16 should be legal with full FP16 support.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir

index ae39676..0162898 100644 (file)
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir
@@ -9,13 +9,17 @@ body:             |
    bb.0:
      liveins: $h0, $h1
      ; FP16-LABEL: name: s16_legal_with_full_fp16
-    ; FP16: %a:_(s16) = COPY $h0
+    ; FP16: liveins: $h0, $h1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(s16) = COPY $h0
      ; FP16-NEXT: %b:_(s16) = COPY $h1
      ; FP16-NEXT: %legalize_me:_(s16) = G_FMAXIMUM %a, %b
      ; FP16-NEXT: $h0 = COPY %legalize_me(s16)
      ; FP16-NEXT: RET_ReallyLR implicit $h0
      ; NO-FP16-LABEL: name: s16_legal_with_full_fp16
-    ; NO-FP16: %a:_(s16) = COPY $h0
+    ; NO-FP16: liveins: $h0, $h1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(s16) = COPY $h0
      ; NO-FP16-NEXT: %b:_(s16) = COPY $h1
      ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %a(s16)
      ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %b(s16)
@@ -37,13 +41,17 @@ body:             |
    bb.0:
      liveins: $s0, $s1
      ; FP16-LABEL: name: s32_legal
-    ; FP16: %a:_(s32) = COPY $s0
+    ; FP16: liveins: $s0, $s1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(s32) = COPY $s0
      ; FP16-NEXT: %b:_(s32) = COPY $s1
      ; FP16-NEXT: %legalize_me:_(s32) = G_FMAXIMUM %a, %b
      ; FP16-NEXT: $s0 = COPY %legalize_me(s32)
      ; FP16-NEXT: RET_ReallyLR implicit $s0
      ; NO-FP16-LABEL: name: s32_legal
-    ; NO-FP16: %a:_(s32) = COPY $s0
+    ; NO-FP16: liveins: $s0, $s1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(s32) = COPY $s0
      ; NO-FP16-NEXT: %b:_(s32) = COPY $s1
      ; NO-FP16-NEXT: %legalize_me:_(s32) = G_FMAXIMUM %a, %b
      ; NO-FP16-NEXT: $s0 = COPY %legalize_me(s32)
@@ -62,13 +70,17 @@ body:             |
    bb.0:
      liveins: $d0, $d1
      ; FP16-LABEL: name: s64_legal
-    ; FP16: %a:_(s64) = COPY $d0
+    ; FP16: liveins: $d0, $d1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(s64) = COPY $d0
      ; FP16-NEXT: %b:_(s64) = COPY $d1
      ; FP16-NEXT: %legalize_me:_(s64) = G_FMAXIMUM %a, %b
      ; FP16-NEXT: $d0 = COPY %legalize_me(s64)
      ; FP16-NEXT: RET_ReallyLR implicit $d0
      ; NO-FP16-LABEL: name: s64_legal
-    ; NO-FP16: %a:_(s64) = COPY $d0
+    ; NO-FP16: liveins: $d0, $d1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(s64) = COPY $d0
      ; NO-FP16-NEXT: %b:_(s64) = COPY $d1
      ; NO-FP16-NEXT: %legalize_me:_(s64) = G_FMAXIMUM %a, %b
      ; NO-FP16-NEXT: $d0 = COPY %legalize_me(s64)
@@ -78,3 +90,62 @@ body:             |
      %legalize_me:_(s64) = G_FMAXIMUM %a, %b
      $d0 = COPY %legalize_me(s64)
      RET_ReallyLR implicit $d0
+...
+---
+name:            v2s32
+alignment:       4
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+    ; FP16-LABEL: name: v2s32
+    ; FP16: liveins: $d0, $d1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(<2 x s32>) = COPY $d0
+    ; FP16-NEXT: %b:_(<2 x s32>) = COPY $d1
+    ; FP16-NEXT: %maximum:_(<2 x s32>) = G_FMAXIMUM %a, %b
+    ; FP16-NEXT: $d0 = COPY %maximum(<2 x s32>)
+    ; FP16-NEXT: RET_ReallyLR implicit $d0
+    ; NO-FP16-LABEL: name: v2s32
+    ; NO-FP16: liveins: $d0, $d1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(<2 x s32>) = COPY $d0
+    ; NO-FP16-NEXT: %b:_(<2 x s32>) = COPY $d1
+    ; NO-FP16-NEXT: %maximum:_(<2 x s32>) = G_FMAXIMUM %a, %b
+    ; NO-FP16-NEXT: $d0 = COPY %maximum(<2 x s32>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $d0
+    %a:_(<2 x s32>) = COPY $d0
+    %b:_(<2 x s32>) = COPY $d1
+    %maximum:_(<2 x s32>) = G_FMAXIMUM %a, %b
+    $d0 = COPY %maximum(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            v4s32
+alignment:       4
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; FP16-LABEL: name: v4s32
+    ; FP16: liveins: $q0, $q1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
+    ; FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
+    ; FP16-NEXT: %maximum:_(<4 x s32>) = G_FMAXIMUM %a, %b
+    ; FP16-NEXT: $q0 = COPY %maximum(<4 x s32>)
+    ; FP16-NEXT: RET_ReallyLR implicit $q0
+    ; NO-FP16-LABEL: name: v4s32
+    ; NO-FP16: liveins: $q0, $q1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
+    ; NO-FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
+    ; NO-FP16-NEXT: %maximum:_(<4 x s32>) = G_FMAXIMUM %a, %b
+    ; NO-FP16-NEXT: $q0 = COPY %maximum(<4 x s32>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+    %a:_(<4 x s32>) = COPY $q0
+    %b:_(<4 x s32>) = COPY $q1
+    %maximum:_(<4 x s32>) = G_FMAXIMUM %a, %b
+    $q0 = COPY %maximum(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir

index 47137b6..1c3c8bb 100644 (file)
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir
@@ -9,13 +9,17 @@ body:             |
    bb.0:
      liveins: $h0, $h1
      ; FP16-LABEL: name: s16_legal_with_full_fp16
-    ; FP16: %a:_(s16) = COPY $h0
+    ; FP16: liveins: $h0, $h1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(s16) = COPY $h0
      ; FP16-NEXT: %b:_(s16) = COPY $h1
      ; FP16-NEXT: %legalize_me:_(s16) = G_FMINIMUM %a, %b
      ; FP16-NEXT: $h0 = COPY %legalize_me(s16)
      ; FP16-NEXT: RET_ReallyLR implicit $h0
      ; NO-FP16-LABEL: name: s16_legal_with_full_fp16
-    ; NO-FP16: %a:_(s16) = COPY $h0
+    ; NO-FP16: liveins: $h0, $h1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(s16) = COPY $h0
      ; NO-FP16-NEXT: %b:_(s16) = COPY $h1
      ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %a(s16)
      ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %b(s16)
@@ -37,13 +41,17 @@ body:             |
    bb.0:
      liveins: $s0, $s1
      ; FP16-LABEL: name: s32_legal
-    ; FP16: %a:_(s32) = COPY $s0
+    ; FP16: liveins: $s0, $s1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(s32) = COPY $s0
      ; FP16-NEXT: %b:_(s32) = COPY $s1
      ; FP16-NEXT: %legalize_me:_(s32) = G_FMINIMUM %a, %b
      ; FP16-NEXT: $s0 = COPY %legalize_me(s32)
      ; FP16-NEXT: RET_ReallyLR implicit $s0
      ; NO-FP16-LABEL: name: s32_legal
-    ; NO-FP16: %a:_(s32) = COPY $s0
+    ; NO-FP16: liveins: $s0, $s1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(s32) = COPY $s0
      ; NO-FP16-NEXT: %b:_(s32) = COPY $s1
      ; NO-FP16-NEXT: %legalize_me:_(s32) = G_FMINIMUM %a, %b
      ; NO-FP16-NEXT: $s0 = COPY %legalize_me(s32)
@@ -62,13 +70,17 @@ body:             |
    bb.0:
      liveins: $d0, $d1
      ; FP16-LABEL: name: s64_legal
-    ; FP16: %a:_(s64) = COPY $d0
+    ; FP16: liveins: $d0, $d1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(s64) = COPY $d0
      ; FP16-NEXT: %b:_(s64) = COPY $d1
      ; FP16-NEXT: %legalize_me:_(s64) = G_FMINIMUM %a, %b
      ; FP16-NEXT: $d0 = COPY %legalize_me(s64)
      ; FP16-NEXT: RET_ReallyLR implicit $d0
      ; NO-FP16-LABEL: name: s64_legal
-    ; NO-FP16: %a:_(s64) = COPY $d0
+    ; NO-FP16: liveins: $d0, $d1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(s64) = COPY $d0
      ; NO-FP16-NEXT: %b:_(s64) = COPY $d1
      ; NO-FP16-NEXT: %legalize_me:_(s64) = G_FMINIMUM %a, %b
      ; NO-FP16-NEXT: $d0 = COPY %legalize_me(s64)
@@ -78,3 +90,77 @@ body:             |
      %legalize_me:_(s64) = G_FMINIMUM %a, %b
      $d0 = COPY %legalize_me(s64)
      RET_ReallyLR implicit $d0
+...
+---
+name:            v4s32
+alignment:       4
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; FP16-LABEL: name: v4s32
+    ; FP16: liveins: $q0, $q1
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
+    ; FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
+    ; FP16-NEXT: %minimum:_(<4 x s32>) = G_FMINIMUM %a, %b
+    ; FP16-NEXT: $q0 = COPY %minimum(<4 x s32>)
+    ; FP16-NEXT: RET_ReallyLR implicit $q0
+    ; NO-FP16-LABEL: name: v4s32
+    ; NO-FP16: liveins: $q0, $q1
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
+    ; NO-FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
+    ; NO-FP16-NEXT: %minimum:_(<4 x s32>) = G_FMINIMUM %a, %b
+    ; NO-FP16-NEXT: $q0 = COPY %minimum(<4 x s32>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+    %a:_(<4 x s32>) = COPY $q0
+    %b:_(<4 x s32>) = COPY $q1
+    %minimum:_(<4 x s32>) = G_FMINIMUM %a, %b
+    $q0 = COPY %minimum(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+
+---
+name:            v8s32
+alignment:       4
+body:             |
+  bb.0:
+    liveins: $q0, $q1, $q2, $q3
+    ; FP16-LABEL: name: v8s32
+    ; FP16: liveins: $q0, $q1, $q2, $q3
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
+    ; FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
+    ; FP16-NEXT: %c:_(<4 x s32>) = COPY $q2
+    ; FP16-NEXT: %d:_(<4 x s32>) = COPY $q3
+    ; FP16-NEXT: [[FMINIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %a, %c
+    ; FP16-NEXT: [[FMINIMUM1:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %b, %d
+    ; FP16-NEXT: $q0 = COPY [[FMINIMUM]](<4 x s32>)
+    ; FP16-NEXT: $q1 = COPY [[FMINIMUM1]](<4 x s32>)
+    ; FP16-NEXT: RET_ReallyLR implicit $q0
+    ; NO-FP16-LABEL: name: v8s32
+    ; NO-FP16: liveins: $q0, $q1, $q2, $q3
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: %a:_(<4 x s32>) = COPY $q0
+    ; NO-FP16-NEXT: %b:_(<4 x s32>) = COPY $q1
+    ; NO-FP16-NEXT: %c:_(<4 x s32>) = COPY $q2
+    ; NO-FP16-NEXT: %d:_(<4 x s32>) = COPY $q3
+    ; NO-FP16-NEXT: [[FMINIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %a, %c
+    ; NO-FP16-NEXT: [[FMINIMUM1:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %b, %d
+    ; NO-FP16-NEXT: $q0 = COPY [[FMINIMUM]](<4 x s32>)
+    ; NO-FP16-NEXT: $q1 = COPY [[FMINIMUM1]](<4 x s32>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+    %a:_(<4 x s32>) = COPY $q0
+    %b:_(<4 x s32>) = COPY $q1
+    %c:_(<4 x s32>) = COPY $q2
+    %d:_(<4 x s32>) = COPY $q3
+    %v1:_(<8 x s32>) = G_CONCAT_VECTORS %a, %b
+    %v2:_(<8 x s32>) = G_CONCAT_VECTORS %c, %d
+    %minimum:_(<8 x s32>) = G_FMINIMUM %v1, %v2
+    %uv1:_(<4 x s32>), %uv2:_(<4 x s32>) = G_UNMERGE_VALUES %minimum
+    $q0 = COPY %uv1(<4 x s32>)
+    $q1 = COPY %uv2(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

index 4be1cf2..47097af 100644 (file)
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -510,11 +510,11 @@
  # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
  # DEBUG-NEXT: G_FMINIMUM (opcode {{[0-9]+}}): 1 type index
  # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
-# DEBUG-NEXT: .. the first uncovered type index: 1, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
  # DEBUG-NEXT: G_FMAXIMUM (opcode {{[0-9]+}}): 1 type index
-# DEBUG-NEXT: .. the first uncovered type index: 1, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
  # DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
  # DEBUG-NEXT: .. the first uncovered type index: 2, OK
  # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir

new file mode 100644 (file)

index 0000000..8c4300d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir
@@ -0,0 +1,188 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -mattr=+fullfp16 -run-pass=aarch64-prelegalizer-combiner -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+---
+name:            test_s16
+body:             |
+  bb.0:
+    liveins: $h0
+
+    ; CHECK-LABEL: name: test_s16
+    ; CHECK: liveins: $h0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+    ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s16) = G_FMAXIMUM [[C]], [[COPY]]
+    ; CHECK-NEXT: $h0 = COPY [[FMAXIMUM]](s16)
+    ; CHECK-NEXT: RET_ReallyLR implicit $h0
+    %0:_(s16) = COPY $h0
+    %1:_(s16) = G_FCONSTANT half 0xH0000
+    %2:_(s1) = G_FCMP floatpred(olt), %0(s16), %1
+    %3:_(s16) = G_SELECT %2(s1), %1, %0
+    $h0 = COPY %3(s16)
+    RET_ReallyLR implicit $h0
+
+...
+---
+name:            test_s32
+body:             |
+  bb.0:
+    liveins: $s0
+
+    ; CHECK-LABEL: name: test_s32
+    ; CHECK: liveins: $s0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s32) = G_FMAXIMUM [[C]], [[COPY]]
+    ; CHECK-NEXT: $s0 = COPY [[FMAXIMUM]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $s0
+    %0:_(s32) = COPY $s0
+    %1:_(s32) = G_FCONSTANT float 0.000000e+00
+    %2:_(s1) = G_FCMP floatpred(olt), %0(s32), %1
+    %3:_(s32) = G_SELECT %2(s1), %1, %0
+    $s0 = COPY %3(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            test_s64
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: test_s64
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+    ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s64) = G_FMAXIMUM [[C]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[FMAXIMUM]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:_(s64) = COPY $d0
+    %1:_(s64) = G_FCONSTANT double 0.000000e+00
+    %2:_(s1) = G_FCMP floatpred(olt), %0(s64), %1
+    %3:_(s64) = G_SELECT %2(s1), %1, %0
+    $d0 = COPY %3(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            test_s64_fmin
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: test_s64_fmin
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+    ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(s64) = G_FMINIMUM [[C]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[FMINIMUM]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:_(s64) = COPY $d0
+    %1:_(s64) = G_FCONSTANT double 0.000000e+00
+    %2:_(s1) = G_FCMP floatpred(ogt), %0(s64), %1
+    %3:_(s64) = G_SELECT %2(s1), %1, %0
+    $d0 = COPY %3(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            test_v8s16
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v8s16
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+    ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<8 x s16>) = G_FMAXIMUM [[BUILD_VECTOR]], [[COPY]]
+    ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<8 x s16>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(<8 x s16>) = COPY $q0
+    %2:_(s16) = G_FCONSTANT half 0xH0000
+    %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16)
+    %3:_(<8 x s1>) = G_FCMP floatpred(olt), %0(<8 x s16>), %1
+    %4:_(<8 x s16>) = G_SELECT %3(<8 x s1>), %1, %0
+    $q0 = COPY %4(<8 x s16>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            test_v4s32
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v4s32
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXIMUM [[BUILD_VECTOR]], [[BITCAST]]
+    ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %1:_(<2 x s64>) = COPY $q0
+    %0:_(<4 x s32>) = G_BITCAST %1(<2 x s64>)
+    %3:_(s32) = G_FCONSTANT float 0.000000e+00
+    %2:_(<4 x s32>) = G_BUILD_VECTOR %3(s32), %3(s32), %3(s32), %3(s32)
+    %4:_(<4 x s1>) = G_FCMP floatpred(olt), %0(<4 x s32>), %2
+    %5:_(<4 x s32>) = G_SELECT %4(<4 x s1>), %2, %0
+    $q0 = COPY %5(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            test_v2s64
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v2s64
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMAXIMUM [[BUILD_VECTOR]], [[COPY]]
+    ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<2 x s64>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(<2 x s64>) = COPY $q0
+    %2:_(s64) = G_FCONSTANT double 0.000000e+00
+    %1:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %2(s64)
+    %3:_(<2 x s1>) = G_FCMP floatpred(olt), %0(<2 x s64>), %1
+    %4:_(<2 x s64>) = G_SELECT %3(<2 x s1>), %1, %0
+    $q0 = COPY %4(<2 x s64>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            test_v2s64_fmin
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v2s64_fmin
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMINIMUM [[BUILD_VECTOR]], [[COPY]]
+    ; CHECK-NEXT: $q0 = COPY [[FMINIMUM]](<2 x s64>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(<2 x s64>) = COPY $q0
+    %2:_(s64) = G_FCONSTANT double 0.000000e+00
+    %1:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %2(s64)
+    %3:_(<2 x s1>) = G_FCMP floatpred(ogt), %0(<2 x s64>), %1
+    %4:_(<2 x s64>) = G_SELECT %3(<2 x s1>), %1, %0
+    $q0 = COPY %4(<2 x s64>)
+    RET_ReallyLR implicit $q0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll

new file mode 100644 (file)

index 0000000..1986e79
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=1 -verify-machineinstrs %s -o - | FileCheck %s
+define half @test_s16(half %a) #0 {
+; CHECK-LABEL: test_s16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    fmax h0, h1, h0
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt half %a, 0.0
+  %sel = select i1 %fcmp, half 0.0, half %a
+  ret half %sel
+}
+
+define float @test_s32(float %a) #0 {
+; CHECK-LABEL: test_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    fmax s0, s1, s0
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt float %a, 0.0
+  %sel = select i1 %fcmp, float 0.0, float %a
+  ret float %sel
+}
+
+define double @test_s64(double %a) #0 {
+; CHECK-LABEL: test_s64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    fmax d0, d1, d0
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt double %a, 0.0
+  %sel = select i1 %fcmp, double 0.0, double %a
+  ret double %sel
+}
+
+define <4 x half> @test_v4s16(<4 x half> %a) #0 {
+; CHECK-LABEL: test_v4s16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    dup v1.4h, v1.h[0]
+; CHECK-NEXT:    fmax v0.4h, v1.4h, v0.4h
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt <4 x half> %a, zeroinitializer
+  %sel = select <4 x i1> %fcmp, <4 x half> zeroinitializer, <4 x half> %a
+  ret <4 x half> %sel
+}
+
+define <8 x half> @test_v8s16(<8 x half> %a) #0 {
+; CHECK-LABEL: test_v8s16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    dup v1.8h, v1.h[0]
+; CHECK-NEXT:    fmax v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt <8 x half> %a, zeroinitializer
+  %sel = select <8 x i1> %fcmp, <8 x half> zeroinitializer, <8 x half> %a
+  ret <8 x half> %sel
+}
+
+define <2 x float> @test_v2s32(<2 x float> %a) #0 {
+; CHECK-LABEL: test_v2s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    dup v1.2s, v1.s[0]
+; CHECK-NEXT:    fmax v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt <2 x float> %a, zeroinitializer
+  %sel = select <2 x i1> %fcmp, <2 x float> zeroinitializer, <2 x float> %a
+  ret <2 x float> %sel
+}
+
+define <4 x float> @test_v4s32(<4 x float> %a) #0 {
+; CHECK-LABEL: test_v4s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    dup v1.4s, v1.s[0]
+; CHECK-NEXT:    fmax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt <4 x float> %a, zeroinitializer
+  %sel = select <4 x i1> %fcmp, <4 x float> zeroinitializer, <4 x float> %a
+  ret <4 x float> %sel
+}
+
+define <2 x double> @test_v2s64(<2 x double> %a) #0 {
+; CHECK-LABEL: test_v2s64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    dup v1.2d, v1.d[0]
+; CHECK-NEXT:    fmax v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %fcmp = fcmp olt <2 x double> %a, zeroinitializer
+  %sel = select <2 x i1> %fcmp, <2 x double> zeroinitializer, <2 x double> %a
+  ret <2 x double> %sel
+}
+
author	Amara Emerson <amara@apple.com>
	Mon, 3 Oct 2022 00:46:20 +0000 (01:46 +0100)
committer	Amara Emerson <amara@apple.com>
	Mon, 3 Oct 2022 20:39:52 +0000 (21:39 +0100)
llvm/include/llvm/Target/GlobalISel/Combine.td		patch \| blob \| history
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir		patch \| blob \| history
llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir		patch \| blob \| history
llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir		patch \| blob \| history
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll	[new file with mode: 0644]	patch \| blob