MI.eraseFromParent();
}
+static bool matchUCharToFloat(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineFunction &MF, CombinerHelper &Helper) {
+ Register DstReg = MI.getOperand(0).getReg();
+
+ // TODO: We could try to match extracting the higher bytes, which would be
+ // easier if i8 vectors weren't promoted to i32 vectors, particularly after
+ // types are legalized. v4i8 -> v4f32 is probably the only case to worry
+ // about in practice.
+ LLT Ty = MRI.getType(DstReg);
+ if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
+ const APInt Mask = APInt::getHighBitsSet(32, 24);
+ return Helper.getKnownBits()->maskedValueIsZero(MI.getOperand(1).getReg(),
+ Mask);
+ }
+
+ return false;
+}
+
+static void applyUCharToFloat(MachineInstr &MI) {
+ MachineIRBuilder B(MI);
+
+ const LLT S32 = LLT::scalar(32);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = B.getMRI()->getType(DstReg);
+
+ if (Ty == S32) {
+ B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
+ {MI.getOperand(1)}, MI.getFlags());
+ } else {
+ auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
+ {MI.getOperand(1)}, MI.getFlags());
+ B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
+ }
+
+ MI.eraseFromParent();
+}
#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AMDGPUGenPostLegalizeGICombiner.inc"
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: uitofp_char_to_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: uitofp_char_to_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 255
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s32) = G_UITOFP %2
+ $vgpr0 = COPY %3
+...
+
+---
+name: uitofp_too_many_bits_to_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: uitofp_too_many_bits_to_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32)
+ ; CHECK: $vgpr0 = COPY [[UITOFP]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 256
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s32) = G_UITOFP %2
+ $vgpr0 = COPY %3
+...
+
+---
+name: sitofp_char_to_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: sitofp_char_to_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 255
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s32) = G_SITOFP %2
+ $vgpr0 = COPY %3
+...
+
+---
+name: sitofp_bits127_to_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: sitofp_bits127_to_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 127
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s32) = G_SITOFP %2
+ $vgpr0 = COPY %3
+...
+
+---
+name: sitofp_bits128_to_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: sitofp_bits128_to_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 128
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s32) = G_SITOFP %2
+ $vgpr0 = COPY %3
+...
+---
+name: sitofp_too_many_bits_to_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: sitofp_too_many_bits_to_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[AND]](s32)
+ ; CHECK: $vgpr0 = COPY [[SITOFP]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 256
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s32) = G_SITOFP %2
+ $vgpr0 = COPY %3
+...
+
+---
+name: uitofp_char_to_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: uitofp_char_to_f16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
+ ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[AMDGPU_CVT_F32_UBYTE0_]](s32)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
+ ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 255
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s16) = G_UITOFP %2
+ %4:_(s32) = G_ANYEXT %3
+ $vgpr0 = COPY %4
+...
+
+---
+name: sitofp_char_to_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: sitofp_char_to_f16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
+ ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[AMDGPU_CVT_F32_UBYTE0_]](s32)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
+ ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 255
+ %2:_(s32) = G_AND %0, %1
+ %3:_(s16) = G_SITOFP %2
+ %4:_(s32) = G_ANYEXT %3
+ $vgpr0 = COPY %4
+...