From 7e9f348b2ddde685f65ad5031a06e36e86e7c6e2 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Sun, 18 Feb 2018 17:10:49 +0000 Subject: [PATCH] [AArch64][GlobalISel] Fix an assert fail/miscompile when fp16 types are copied to gpr register banks. PR36345. rdar://36478867 Differential Revision: https://reviews.llvm.org/D43310 llvm-svn: 325463 --- .../Target/AArch64/AArch64InstructionSelector.cpp | 25 ++++++++ .../CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir | 69 ++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index f1dfe00..64531d7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -356,6 +356,31 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return false; } + if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + const RegClassOrRegBank &RegClassOrBank = + MRI.getRegClassOrRegBank(SrcReg); + + const TargetRegisterClass *SrcRC = + RegClassOrBank.dyn_cast(); + const RegisterBank *RB = nullptr; + if (!SrcRC) { + RB = RegClassOrBank.get(); + SrcRC = getRegClassForTypeOnBank(MRI.getType(SrcReg), *RB, RBI, true); + } + // Copies from fpr16 to gpr32 need to use SUBREG_TO_REG. + if (RC == &AArch64::GPR32allRegClass && SrcRC == &AArch64::FPR16RegClass) { + unsigned PromoteReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); + BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(AArch64::SUBREG_TO_REG)) + .addDef(PromoteReg) + .addImm(0) + .addUse(SrcReg) + .addImm(AArch64::hsub); + MachineOperand &RegOp = I.getOperand(1); + RegOp.setReg(PromoteReg); + } + } + // No need to constrain SrcReg. It will get constrained when // we hit another of its use or its defs. // Copies do not have constraints. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir new file mode 100644 index 0000000..3a718cf --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir @@ -0,0 +1,69 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -o - -global-isel -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s + +# PR36345 +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-arm-none-eabi" + + %struct.struct2 = type { [2 x half] } + + @global_arg0 = common dso_local global %struct.struct2 zeroinitializer, align 2 + + ; Function Attrs: noinline nounwind optnone + define dso_local void @c_test([2 x half], [2 x half]* %addr) { + store [2 x half] %0, [2 x half]* %addr, align 2 + ret void + } +... +--- +name: c_test +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } + - { id: 9, class: gpr } + - { id: 10, class: gpr } + - { id: 11, class: gpr } + - { id: 12, class: gpr } +body: | + bb.1 (%ir-block.1): + liveins: $h0, $h1, $x0 + + ; CHECK-LABEL: name: c_test + ; CHECK: liveins: $h0, $h1, $x0 + ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY $h0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]] + ; CHECK: [[BFMWri:%[0-9]+]]:gpr32 = BFMWri [[DEF]], [[COPY2]], 0, 15 + ; CHECK: [[SUBREG_TO_REG1:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG1]] + ; CHECK: [[BFMWri1:%[0-9]+]]:gpr32 = BFMWri [[BFMWri]], [[COPY3]], 16, 15 + ; CHECK: [[COPY4:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: STRWui [[BFMWri1]], [[COPY4]], 0 :: (store 4 into %ir.addr, align 2) + ; CHECK: RET_ReallyLR + %1:fpr(s16) = COPY $h0 + %2:fpr(s16) = COPY $h1 + %3:gpr(s32) = G_IMPLICIT_DEF + %11:gpr(s16) = COPY %1(s16) + %4:gpr(s32) = G_INSERT %3, %11(s16), 0 + %12:gpr(s16) = COPY %2(s16) + %5:gpr(s32) = G_INSERT %4, %12(s16), 16 + %0:gpr(s32) = COPY %5(s32) + %6:gpr(p0) = COPY $x0 + G_STORE %0(s32), %6(p0) :: (store 4 into %ir.addr, align 2) + RET_ReallyLR + +... -- 2.7.4