/// Generic byte swap.
HANDLE_TARGET_OPCODE(G_BSWAP)
+/// Floating point ceil.
+HANDLE_TARGET_OPCODE(G_FCEIL)
+
/// Generic AddressSpaceCast.
HANDLE_TARGET_OPCODE(G_ADDRSPACE_CAST)
let hasSideEffects = 0;
}
+// Floating point ceiling of a value.
+def G_FCEIL : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
//------------------------------------------------------------------------------
// Opcodes for LLVM Intrinsics
//------------------------------------------------------------------------------
def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
def : GINodeEquiv<G_CTPOP, ctpop>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+def : GINodeEquiv<G_FCEIL, fceil>;
// Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some
// complications that tablegen must take care of. For example, Predicates such
}
case Intrinsic::invariant_end:
return true;
+ case Intrinsic::ceil:
+ MIRBuilder.buildInstr(TargetOpcode::G_FCEIL)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
}
return false;
}
getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
+ // TODO: Handle s16.
+ getActionDefinitionsBuilder(G_FCEIL)
+ .legalFor({s32, s64, v2s32, v4s32, v2s64});
+
getActionDefinitionsBuilder(G_INSERT)
.unsupportedIf([=](const LegalityQuery &Query) {
return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
call void @llvm.invariant.end.p0i8({}* %inv, i64 8, i8* %y)
ret void
}
+
+declare float @llvm.ceil.f32(float)
+define float @test_ceil_f32(float %x) {
+ ; CHECK-LABEL: name: test_ceil_f32
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+ %y = call float @llvm.ceil.f32(float %x)
+ ret float %y
+}
+
+declare double @llvm.ceil.f64(double)
+define double @test_ceil_f64(double %x) {
+ ; CHECK-LABEL: name: test_ceil_f64
+ ; CHECK: %{{[0-9]+}}:_(s64) = G_FCEIL %{{[0-9]+}}
+ %y = call double @llvm.ceil.f64(double %x)
+ ret double %y
+}
+
+declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
+define <2 x float> @test_ceil_v2f32(<2 x float> %x) {
+ ; CHECK-LABEL: name: test_ceil_v2f32
+ ; CHECK: %{{[0-9]+}}:_(<2 x s32>) = G_FCEIL %{{[0-9]+}}
+ %y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x)
+ ret <2 x float> %y
+}
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
+define <4 x float> @test_ceil_v4f32(<4 x float> %x) {
+ ; CHECK-LABEL: name: test_ceil_v4f32
+ ; CHECK: %{{[0-9]+}}:_(<4 x s32>) = G_FCEIL %{{[0-9]+}}
+ ; SELECT: %{{[0-9]+}}:fpr128 = FRINTPv4f32 %{{[0-9]+}}
+ %y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x)
+ ret <4 x float> %y
+}
+
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+define <2 x double> @test_ceil_v2f64(<2 x double> %x) {
+ ; CHECK-LABEL: name: test_ceil_v2f64
+ ; CHECK: %{{[0-9]+}}:_(<2 x s64>) = G_FCEIL %{{[0-9]+}}
+ %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x)
+ ret <2 x double> %y
+}
#
# DEBUG-NEXT: G_BSWAP (opcode {{[0-9]+}}): 1 type index
# DEBUG: .. the first uncovered type index: 1, OK
+#
+# DEBUG-NEXT: G_FCEIL (opcode {{[0-9]+}}): 1 type index
+# DEBUG: .. the first uncovered type index: 1, OK
# CHECK-NOT: ill-defined
--- /dev/null
+# RUN: llc -verify-machineinstrs -mtriple aarch64--- \
+# RUN: -run-pass=instruction-select -global-isel %s -o - | FileCheck %s
+...
+---
+name: ceil_float
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: ceil_float
+ ; CHECK: %{{[0-9]+}}:fpr32 = FRINTPSr %{{[0-9]+}}
+ liveins: $s0
+ %0:fpr(s32) = COPY $s0
+ %1:fpr(s32) = G_FCEIL %0
+ $s0 = COPY %1(s32)
+
+...
+---
+name: ceil_double
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: ceil_double
+ ; CHECK: %{{[0-9]+}}:fpr64 = FRINTPDr %{{[0-9]+}}
+ liveins: $d0
+ %0:fpr(s64) = COPY $d0
+ %1:fpr(s64) = G_FCEIL %0
+ $d0 = COPY %1(s64)
+
+...
+---
+name: ceil_v2f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: ceil_v2f32
+ ; CHECK: %{{[0-9]+}}:fpr64 = FRINTPv2f32 %{{[0-9]+}}
+ liveins: $d0
+ %0:fpr(<2 x s32>) = COPY $d0
+ %1:fpr(<2 x s32>) = G_FCEIL %0
+ $d0 = COPY %1(<2 x s32>)
+
+...
+---
+name: ceil_v4f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: ceil_v4f32
+ ; CHECK: %{{[0-9]+}}:fpr128 = FRINTPv4f32 %{{[0-9]+}}
+ liveins: $q0
+ %0:fpr(<4 x s32>) = COPY $q0
+ %1:fpr(<4 x s32>) = G_FCEIL %0
+ $q0 = COPY %1(<4 x s32>)
+
+...
+---
+name: ceil_v2f64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: ceil_v2f64
+ ; CHECK: %{{[0-9]+}}:fpr128 = FRINTPv2f64 %{{[0-9]+}}
+ liveins: $q0
+ %0:fpr(<2 x s64>) = COPY $q0
+ %1:fpr(<2 x s64>) = G_FCEIL %0
+ $q0 = COPY %1(<2 x s64>)
+
+...
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -pass-remarks-missed=gisel-* \
+; RUN: -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | \
+; RUN: FileCheck %s --check-prefixes=FALLBACK,CHECK
define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
;CHECK-LABEL: fcvtas_2s:
declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) nounwind readnone
declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) nounwind readnone
+; FALLBACK-NOT: remark{{.*}}frintp_2s
define <2 x float> @frintp_2s(<2 x float> %A) nounwind {
;CHECK-LABEL: frintp_2s:
;CHECK-NOT: ld1
ret <2 x float> %tmp3
}
+; FALLBACK-NOT: remark{{.*}}frintp_4s
define <4 x float> @frintp_4s(<4 x float> %A) nounwind {
;CHECK-LABEL: frintp_4s:
;CHECK-NOT: ld1
ret <4 x float> %tmp3
}
+; FALLBACK-NOT: remark{{.*}}frintp_2d
define <2 x double> @frintp_2d(<2 x double> %A) nounwind {
;CHECK-LABEL: frintp_2d:
;CHECK-NOT: ld1