[AArch64][GlobalISel] Legalize arithmetic ops for <4 x s16>

author Amara Emerson <amara@apple.com>

Fri, 18 Sep 2020 23:45:12 +0000 (16:45 -0700)

committer Amara Emerson <amara@apple.com>

Sat, 19 Sep 2020 00:13:55 +0000 (17:13 -0700)
author Amara Emerson <amara@apple.com>
Fri, 18 Sep 2020 23:45:12 +0000 (16:45 -0700)
committer Amara Emerson <amara@apple.com>
Sat, 19 Sep 2020 00:13:55 +0000 (17:13 -0700)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

index a0af333db528cead924886721737d052737b56f5..b69a70bc0bbd11593580a4171172a7c76eb60615 100644 (file)
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -90,7 +90,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
        .widenScalarToNextPow2(0);
  
    getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
-      .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
+      .legalFor({s32, s64, v2s32, v4s32, v2s64, v4s16, v8s16, v16s8})
        .clampScalar(0, s32, s64)
        .widenScalarToNextPow2(0)
        .clampNumElements(0, v2s32, v4s32)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll

index a90d899ec3aa4a2df566c712a3a614f921d6fc12..21ea67c5a1cc31f85e29759d123868bdf11d104e 100644 (file)
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -161,14 +161,6 @@ entry:
    ret i32 0
  }
  
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: %2:fpr(<4 x s16>) = G_ZEXT %0:fpr(<4 x s8>) (in function: zext_v4s8)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for zext_v4s8
-; FALLBACK-WITH-REPORT-OUT-LABEL: zext_v4s8
-define <4 x i16> @zext_v4s8(<4 x i8> %in) {
-  %ext = zext <4 x i8> %in to <4 x i16>
-  ret <4 x i16> %ext
-}
-
  ; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: RET_ReallyLR implicit $x0 (in function: strict_align_feature)
  ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for strict_align_feature
  ; FALLBACK-WITH-REPORT-OUT-LABEL: strict_align_feature
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir

index abab5192ebb5e9ef9df3b0c83d3e96aaac329b23..5e0755836ce43812b8efd1854331b208e3b4b5a3 100644 (file)
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir
@@ -172,3 +172,25 @@ body:             |
      RET_ReallyLR implicit $q0
  
  ...
+---
+name:            add_v4i16
+alignment:       4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.1:
+    liveins: $d0, $d1
+
+    ; CHECK-LABEL: name: add_v4i16
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1
+    ; CHECK: [[ADD:%[0-9]+]]:_(<4 x s16>) = G_ADD [[COPY]], [[COPY1]]
+    ; CHECK: $d0 = COPY [[ADD]](<4 x s16>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:_(<4 x s16>) = COPY $d0
+    %1:_(<4 x s16>) = COPY $d1
+    %2:_(<4 x s16>) = G_ADD %0, %1
+    $d0 = COPY %2(<4 x s16>)
+    RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll

index a5b7def829ebd64adc38557a71a998fb02fabbae..354f13dc1472f6082c638667d1eb3aa62b0fd2ec 100644 (file)
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -968,8 +968,16 @@ define <2 x i32> @abspattern1(<2 x i32> %a) nounwind {
  
  define <4 x i16> @abspattern2(<4 x i16> %a) nounwind {
  ; CHECK-LABEL: abspattern2:
-; CHECK: abs.4h
-; CHECK-NEXT: ret
+; DAG: abs.4h
+; DAG-NEXT: ret
+
+; For GlobalISel, this generates terrible code until we can pattern match this to abs.
+; GISEL-DAG: sub.4h
+; GISEL-DAG: cmgt.4h
+; GISEL: csel
+; GISEL: csel
+; GISEL: csel
+; GISEL: csel
          %tmp1neg = sub <4 x i16> zeroinitializer, %a
          %b = icmp sgt <4 x i16> %a, zeroinitializer
          %abs = select <4 x i1> %b, <4 x i16> %a, <4 x i16> %tmp1neg
author	Amara Emerson <amara@apple.com>
	Fri, 18 Sep 2020 23:45:12 +0000 (16:45 -0700)
committer	Amara Emerson <amara@apple.com>
	Sat, 19 Sep 2020 00:13:55 +0000 (17:13 -0700)
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir		patch \| blob \| history
llvm/test/CodeGen/AArch64/arm64-vabs.ll		patch \| blob \| history