From 22457f8e9bd6e21d890134db2b268636cebd4591 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Wed, 30 Jan 2019 21:03:52 +0000 Subject: [PATCH] [GlobalISel][AArch64] Add instruction selection support for @llvm.sqrt This teaches the legalizer about G_FSQRT in AArch64. Also adds a legalizer test for G_FSQRT, a selection test for it, and updates existing floating point tests. https://reviews.llvm.org/D57361 llvm-svn: 352671 --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 2 + llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp | 2 +- .../lib/Target/AArch64/AArch64RegisterBankInfo.cpp | 1 + .../CodeGen/AArch64/GlobalISel/legalize-sqrt.mir | 86 ++++++++++++++ .../CodeGen/AArch64/GlobalISel/select-sqrt.mir | 130 +++++++++++++++++++++ .../test/CodeGen/AArch64/arm64-vfloatintrinsics.ll | 21 ++++ llvm/test/CodeGen/AArch64/f16-instructions.ll | 13 +++ 7 files changed, 254 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/legalize-sqrt.mir create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select-sqrt.mir diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 5fb5aae..ee9163f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1197,6 +1197,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FSIN: case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: + case TargetOpcode::G_FSQRT: assert(TypeIdx == 0); Observer.changingInstr(MI); @@ -1842,6 +1843,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_INTRINSIC_TRUNC: case G_FCOS: case G_FSIN: + case G_FSQRT: return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); case G_ZEXT: case G_SEXT: diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 8496c92..e334a2d 100644 --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -124,7 +124,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64}); - getActionDefinitionsBuilder(G_FCEIL) + getActionDefinitionsBuilder({G_FCEIL, G_FSQRT}) // If we don't have full FP16 support, then scalarize the elements of // vectors containing fp16 types. .fewerElementsIf( diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index 23d7e18..256a483 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -397,6 +397,7 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { case TargetOpcode::G_FSIN: case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: + case TargetOpcode::G_FSQRT: return true; } return false; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sqrt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sqrt.mir new file mode 100644 index 0000000..9a6fe80 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sqrt.mir @@ -0,0 +1,86 @@ +# RUN: llc -mtriple=arm64-unknown-unknown -global-isel -O0 -mattr=-fullfp16 -run-pass=legalizer %s -o - | FileCheck %s + +--- | + define <8 x half> @test_v8f16.sqrt(<8 x half> %a) { + ret <8 x half> %a + } + + define <4 x half> @test_v4f16.sqrt(<4 x half> %a) { + ret <4 x half> %a + } + +... +--- +name: test_v8f16.sqrt +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: $q0 + ; CHECK-LABEL: name: test_v8f16.sqrt + %0:_(<8 x s16>) = COPY $q0 + ; CHECK: %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<8 x s16>) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(<8 x s16>) = G_BUILD_VECTOR %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16) + %1:_(<8 x s16>) = G_FSQRT %0 + $q0 = COPY %1(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v4f16.sqrt +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: $d0 + ; CHECK-LABEL: name: test_v4f16.sqrt + %0:_(<4 x s16>) = COPY $d0 + ; CHECK: %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(<4 x s16>) = G_BUILD_VECTOR %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16) + %1:_(<4 x s16>) = G_FSQRT %0 + $d0 = COPY %1(<4 x s16>) + RET_ReallyLR implicit $d0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-sqrt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-sqrt.mir new file mode 100644 index 0000000..250fc21 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-sqrt.mir @@ -0,0 +1,130 @@ +# RUN: llc -verify-machineinstrs -mtriple aarch64--- \ +# RUN: -run-pass=instruction-select -mattr=+fullfp16 -global-isel %s -o - \ +# RUN: | FileCheck %s +... +--- +name: sqrt_float +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_float + ; CHECK: %{{[0-9]+}}:fpr32 = FSQRTSr %{{[0-9]+}} + liveins: $s0 + %0:fpr(s32) = COPY $s0 + %1:fpr(s32) = G_FSQRT %0 + $s0 = COPY %1(s32) + +... +--- +name: sqrt_double +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_double + ; CHECK: %{{[0-9]+}}:fpr64 = FSQRTDr %{{[0-9]+}} + liveins: $d0 + %0:fpr(s64) = COPY $d0 + %1:fpr(s64) = G_FSQRT %0 + $d0 = COPY %1(s64) + +... +--- +name: sqrt_v2f32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_v2f32 + ; CHECK: %{{[0-9]+}}:fpr64 = FSQRTv2f32 %{{[0-9]+}} + liveins: $d0 + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = G_FSQRT %0 + $d0 = COPY %1(<2 x s32>) + +... +--- +name: sqrt_v4f32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_v4f32 + ; CHECK: %{{[0-9]+}}:fpr128 = FSQRTv4f32 %{{[0-9]+}} + liveins: $q0 + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = G_FSQRT %0 + $q0 = COPY %1(<4 x s32>) + +... +--- +name: sqrt_v2f64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_v2f64 + ; CHECK: %{{[0-9]+}}:fpr128 = FSQRTv2f64 %{{[0-9]+}} + liveins: $q0 + %0:fpr(<2 x s64>) = COPY $q0 + %1:fpr(<2 x s64>) = G_FSQRT %0 + $q0 = COPY %1(<2 x s64>) + +... +--- +name: sqrt_v4f16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_v4f16 + ; CHECK: %{{[0-9]+}}:fpr64 = FSQRTv4f16 %{{[0-9]+}} + liveins: $d0 + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = G_FSQRT %0 + $d0 = COPY %1(<4 x s16>) + +... +--- +name: sqrt_v8f16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_v8f16 + ; CHECK: %{{[0-9]+}}:fpr128 = FSQRTv8f16 %{{[0-9]+}} + liveins: $q0 + %0:fpr(<8 x s16>) = COPY $q0 + %1:fpr(<8 x s16>) = G_FSQRT %0 + $q0 = COPY %1(<8 x s16>) + +... diff --git a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll index 9eb5e43..125dfea 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll @@ -14,12 +14,18 @@ %v4f16 = type <4 x half> +; FALLBACK-NOT: remark{{.*}}test_v4f16.sqrt define %v4f16 @test_v4f16.sqrt(%v4f16 %a) { ; CHECK-LABEL: test_v4f16.sqrt: ; CHECK-NOFP16-COUNT-4: fsqrt s{{[0-9]+}}, s{{[0-9]+}} ; CHECK-FP16-NOT: fcvt ; CHECK-FP16: fsqrt.4h ; CHECK-FP16-NEXT: ret + ; GISEL-LABEL: test_v4f16.sqrt: + ; GISEL-NOFP16-COUNT-4: fsqrt s{{[0-9]+}}, s{{[0-9]+}} + ; GISEL-FP16-NOT: fcvt + ; GISEL-FP16: fsqrt.4h + ; GISEL-FP16-NEXT: ret %1 = call %v4f16 @llvm.sqrt.v4f16(%v4f16 %a) ret %v4f16 %1 } @@ -193,12 +199,18 @@ declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0 %v8f16 = type <8 x half> +; FALLBACK-NOT: remark{{.*}}test_v8f16.sqrt define %v8f16 @test_v8f16.sqrt(%v8f16 %a) { ; CHECK-LABEL: test_v8f16.sqrt: ; CHECK-NOFP16-COUNT-8: fsqrt s{{[0-9]+}}, s{{[0-9]+}} ; CHECK-FP16-NOT: fcvt ; CHECK-FP16: fsqrt.8h ; CHECK-FP16-NEXT: ret + ; GISEL-LABEL: test_v8f16.sqrt: + ; GISEL-NOFP16-COUNT-8: fsqrt s{{[0-9]+}}, s{{[0-9]+}} + ; GISEL-FP16-NOT: fcvt + ; GISEL-FP16: fsqrt.8h + ; GISEL-FP16-NEXT: ret %1 = call %v8f16 @llvm.sqrt.v8f16(%v8f16 %a) ret %v8f16 %1 } @@ -372,9 +384,12 @@ declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0 %v2f32 = type <2 x float> +; FALLBACK-NOT: remark{{.*}}test_v2f32.sqrt ; CHECK-LABEL: test_v2f32.sqrt: +; GISEL-LABEL: test_v2f32.sqrt: define %v2f32 @test_v2f32.sqrt(%v2f32 %a) { ; CHECK: fsqrt.2s + ; GISEL: fsqrt.2s %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a) ret %v2f32 %1 } @@ -513,9 +528,12 @@ declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0 %v4f32 = type <4 x float> +; FALLBACK-NOT: remark{{.*}}test_v4f32.sqrt ; CHECK: test_v4f32.sqrt: +; GISEL: test_v4f32.sqrt: define %v4f32 @test_v4f32.sqrt(%v4f32 %a) { ; CHECK: fsqrt.4s + ; GISEL: fsqrt.4s %1 = call %v4f32 @llvm.sqrt.v4f32(%v4f32 %a) ret %v4f32 %1 } @@ -652,9 +670,12 @@ declare %v4f32 @llvm.nearbyint.v4f32(%v4f32) #0 ;;; Double vector %v2f64 = type <2 x double> +; FALLBACK-NOT: remark{{.*}}test_v2f64.sqrt ; CHECK: test_v2f64.sqrt: +; GISEL: test_v2f64.sqrt: define %v2f64 @test_v2f64.sqrt(%v2f64 %a) { ; CHECK: fsqrt.2d + ; GISEL: fsqrt.2d %1 = call %v2f64 @llvm.sqrt.v2f64(%v2f64 %a) ret %v2f64 %1 } diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index b4952c5..df4f64e 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -781,6 +781,9 @@ declare half @llvm.aarch64.neon.frecpe.f16(half %a) #0 declare half @llvm.aarch64.neon.frecpx.f16(half %a) #0 declare half @llvm.aarch64.neon.frsqrte.f16(half %a) #0 +; FALLBACK-NOT: remark:{{.*}}test_sqrt +; FALLBACK-FP16-NOT: remark:{{.*}}test_sqrt + ; CHECK-CVT-LABEL: test_sqrt: ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fsqrt s0, s0 @@ -791,6 +794,16 @@ declare half @llvm.aarch64.neon.frsqrte.f16(half %a) #0 ; CHECK-FP16-NEXT: fsqrt h0, h0 ; CHECK-FP16-NEXT: ret +; GISEL-CVT-LABEL: test_sqrt: +; GISEL-CVT-NEXT: fcvt s0, h0 +; GISEL-CVT-NEXT: fsqrt s0, s0 +; GISEL-CVT-NEXT: fcvt h0, s0 +; GISEL-CVT-NEXT: ret + +; GISEL-FP16-LABEL: test_sqrt: +; GISEL-FP16-NEXT: fsqrt h0, h0 +; GISEL-FP16-NEXT: ret + define half @test_sqrt(half %a) #0 { %r = call half @llvm.sqrt.f16(half %a) ret half %r -- 2.7.4