From ea92283449f9b132531ef152625e3e799395e449 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Thu, 23 Sep 2021 18:13:58 +0200 Subject: [PATCH] [SystemZ] Implement ISD::BITCAST for fp128 -> i128. The type legalizer has by default no method of doing this bitcast other than storing and reloading the value from stack. This patch implements a custom lowering of this operation using extractions of subregs (z13 and earlier using FP128 register pairs), or of vector elements (with 'vector enhancements 1' using VR128 FP registers). Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D110346 --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 29 +++++++ llvm/test/CodeGen/SystemZ/fp-conv-19.ll | 103 ++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 llvm/test/CodeGen/SystemZ/fp-conv-19.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 201dcae..de760c2 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -293,6 +293,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); + // Handle bitcast from fp128 to i128. + setOperationAction(ISD::BITCAST, MVT::i128, Custom); + // We have native instructions for i8, i16 and i32 extensions, but not i1. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); for (MVT VT : MVT::integer_valuetypes()) { @@ -5587,6 +5590,32 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N, Results.push_back(Res.getValue(2)); break; } + case ISD::BITCAST: { + SDValue Src = N->getOperand(0); + if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 && + !useSoftFloat()) { + SDLoc DL(N); + SDValue Lo, Hi; + if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) { + SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src); + Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC, + DAG.getConstant(1, DL, MVT::i32)); + Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC, + DAG.getConstant(0, DL, MVT::i32)); + } else { + assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && + "Unrecognized register class for f128."); + SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, + DL, MVT::f64, Src); + SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, + DL, MVT::f64, Src); + Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP); + Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP); + } + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi)); + } + break; + } default: llvm_unreachable("Unexpected node to lower"); } diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-19.ll b/llvm/test/CodeGen/SystemZ/fp-conv-19.ll new file mode 100644 index 0000000..959ae8b --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-conv-19.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; Test f128 to i128 bitcasts. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=Z10 %s +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=Z14 %s + +define i64 @extract_float_hi(fp128* %0, fp128* %1) { +; Z10-LABEL: extract_float_hi: +; Z10: # %bb.0: # %entry +; Z10-NEXT: ld %f0, 0(%r2) +; Z10-NEXT: ld %f2, 8(%r2) +; Z10-NEXT: ld %f1, 0(%r3) +; Z10-NEXT: ld %f3, 8(%r3) +; Z10-NEXT: axbr %f1, %f0 +; Z10-NEXT: lgdr %r2, %f1 +; Z10-NEXT: br %r14 +; +; Z14-LABEL: extract_float_hi: +; Z14: # %bb.0: # %entry +; Z14-NEXT: vl %v0, 0(%r2), 3 +; Z14-NEXT: vl %v1, 0(%r3), 3 +; Z14-NEXT: wfaxb %v0, %v0, %v1 +; Z14-NEXT: vlgvg %r2, %v0, 0 +; Z14-NEXT: br %r14 +entry: + %x = load fp128, fp128* %0 + %y = load fp128, fp128* %1 + %add = fadd fp128 %x, %y + %2 = bitcast fp128 %add to i128 + %u.sroa.0.0.extract.shift = lshr i128 %2, 64 + %u.sroa.0.0.extract.trunc = trunc i128 %u.sroa.0.0.extract.shift to i64 + ret i64 %u.sroa.0.0.extract.trunc +} + +define i64 @extract_float_lo(fp128* %0, fp128* %1) { +; Z10-LABEL: extract_float_lo: +; Z10: # %bb.0: # %entry +; Z10-NEXT: ld %f0, 0(%r2) +; Z10-NEXT: ld %f2, 8(%r2) +; Z10-NEXT: ld %f1, 0(%r3) +; Z10-NEXT: ld %f3, 8(%r3) +; Z10-NEXT: axbr %f1, %f0 +; Z10-NEXT: lgdr %r2, %f3 +; Z10-NEXT: br %r14 +; +; Z14-LABEL: extract_float_lo: +; Z14: # %bb.0: # %entry +; Z14-NEXT: vl %v0, 0(%r2), 3 +; Z14-NEXT: vl %v1, 0(%r3), 3 +; Z14-NEXT: wfaxb %v0, %v0, %v1 +; Z14-NEXT: vlgvg %r2, %v0, 1 +; Z14-NEXT: br %r14 +entry: + %x = load fp128, fp128* %0 + %y = load fp128, fp128* %1 + %add = fadd fp128 %x, %y + %2 = bitcast fp128 %add to i128 + %u.sroa.0.0.extract.trunc = trunc i128 %2 to i64 + ret i64 %u.sroa.0.0.extract.trunc +} + +define i128 @bitcast_128(fp128* %0, fp128* %1) { +; Z10-LABEL: bitcast_128: +; Z10: # %bb.0: # %entry +; Z10-NEXT: ld %f0, 0(%r3) +; Z10-NEXT: ld %f2, 8(%r3) +; Z10-NEXT: ld %f1, 0(%r4) +; Z10-NEXT: ld %f3, 8(%r4) +; Z10-NEXT: axbr %f1, %f0 +; Z10-NEXT: lgdr %r0, %f3 +; Z10-NEXT: lgdr %r1, %f1 +; Z10-NEXT: oill %r1, 1 +; Z10-NEXT: oill %r0, 3 +; Z10-NEXT: stg %r0, 8(%r2) +; Z10-NEXT: stg %r1, 0(%r2) +; Z10-NEXT: br %r14 +; +; Z14-LABEL: bitcast_128: +; Z14: # %bb.0: # %entry +; Z14-NEXT: vl %v0, 0(%r3), 3 +; Z14-NEXT: vl %v1, 0(%r4), 3 +; Z14-NEXT: wfaxb %v0, %v0, %v1 +; Z14-NEXT: vlgvg %r0, %v0, 1 +; Z14-NEXT: vlgvg %r1, %v0, 0 +; Z14-NEXT: oill %r1, 1 +; Z14-NEXT: oill %r0, 3 +; Z14-NEXT: stg %r0, 8(%r2) +; Z14-NEXT: stg %r1, 0(%r2) +; Z14-NEXT: br %r14 +entry: + %x = load fp128, fp128* %0 + %y = load fp128, fp128* %1 + %add = fadd fp128 %x, %y + %i = bitcast fp128 %add to i128 + %hibit = shl i128 1, 64 + %i2 = or i128 %i, %hibit + %i3 = or i128 %i2, 3 + ret i128 %i3 +} -- 2.7.4