From: Amara Emerson Date: Thu, 19 Aug 2021 22:45:50 +0000 (-0700) Subject: [AArch64][GlobalISel] Fix miscompile of <16 x s8> G_EXTRACT_VECTOR_ELT. X-Git-Tag: upstream/15.0.7~33447 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a0051f71499bbe92fd15e9fb25005e7c55f6e9bd;p=platform%2Fupstream%2Fllvm.git [AArch64][GlobalISel] Fix miscompile of <16 x s8> G_EXTRACT_VECTOR_ELT. When support for copying vector s8 lanes was added recently, this also had the side effect of fixing a fallback for <16 x s8> extracts since both used the same helper. However, there was a bug in another helper to get the regclass for a specific FPR-native type, which was assigning FPR16 to s8 instead of FPR8. --- diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 336cb7e..bce7f94a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -496,14 +496,18 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, } if (RB.getID() == AArch64::FPRRegBankID) { - if (Ty.getSizeInBits() <= 16) + switch (Ty.getSizeInBits()) { + case 8: + return &AArch64::FPR8RegClass; + case 16: return &AArch64::FPR16RegClass; - if (Ty.getSizeInBits() == 32) + case 32: return &AArch64::FPR32RegClass; - if (Ty.getSizeInBits() == 64) + case 64: return &AArch64::FPR64RegClass; - if (Ty.getSizeInBits() == 128) + case 128: return &AArch64::FPR128RegClass; + } return nullptr; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir index 919fec7..e157c73 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir @@ -211,6 +211,35 @@ body: | RET_ReallyLR implicit $h0 ... --- +name: v16s8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: v16s8 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr8 = COPY [[COPY]].bsub + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY1]], %subreg.bsub + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]] + ; CHECK: $w0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<16 x s8>) = COPY $q0 + %2:gpr(s64) = G_CONSTANT i64 0 + %1:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<16 x s8>), %2(s64) + %4:gpr(s8) = COPY %1(s8) + %3:gpr(s32) = G_ANYEXT %4(s8) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- name: v2p0 alignment: 4 legalized: true